summaryrefslogtreecommitdiff
path: root/fs/afs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/afs')
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/Makefile4
-rw-r--r--fs/afs/addr_list.c254
-rw-r--r--fs/afs/addr_prefs.c533
-rw-r--r--fs/afs/afs.h9
-rw-r--r--fs/afs/afs_vl.h10
-rw-r--r--fs/afs/callback.c145
-rw-r--r--fs/afs/cell.c572
-rw-r--r--fs/afs/cm_security.c340
-rw-r--r--fs/afs/cmservice.c81
-rw-r--r--fs/afs/dir.c1169
-rw-r--r--fs/afs/dir_edit.c439
-rw-r--r--fs/afs/dir_search.c227
-rw-r--r--fs/afs/dir_silly.c19
-rw-r--r--fs/afs/dynroot.c478
-rw-r--r--fs/afs/file.c519
-rw-r--r--fs/afs/flock.c70
-rw-r--r--fs/afs/fs_operation.c198
-rw-r--r--fs/afs/fs_probe.c339
-rw-r--r--fs/afs/fsclient.c137
-rw-r--r--fs/afs/inode.c403
-rw-r--r--fs/afs/internal.h746
-rw-r--r--fs/afs/main.c27
-rw-r--r--fs/afs/misc.c38
-rw-r--r--fs/afs/mntpt.c35
-rw-r--r--fs/afs/proc.c125
-rw-r--r--fs/afs/protocol_yfs.h3
-rw-r--r--fs/afs/rotate.c529
-rw-r--r--fs/afs/rxrpc.c284
-rw-r--r--fs/afs/security.c51
-rw-r--r--fs/afs/server.c678
-rw-r--r--fs/afs/server_list.c180
-rw-r--r--fs/afs/super.c46
-rw-r--r--fs/afs/validation.c484
-rw-r--r--fs/afs/vl_alias.c83
-rw-r--r--fs/afs/vl_list.c29
-rw-r--r--fs/afs/vl_probe.c62
-rw-r--r--fs/afs/vl_rotate.c225
-rw-r--r--fs/afs/vlclient.c146
-rw-r--r--fs/afs/volume.c104
-rw-r--r--fs/afs/write.c974
-rw-r--r--fs/afs/xattr.c8
-rw-r--r--fs/afs/xdr_fs.h2
-rw-r--r--fs/afs/yfsclient.c327
44 files changed, 6748 insertions, 4385 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index fc8ba9142f2f..682bd8ec2c10 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -5,6 +5,7 @@ config AFS_FS
select AF_RXRPC
select DNS_RESOLVER
select NETFS_SUPPORT
+ select CRYPTO_KRB5
help
If you say Y here, you will get an experimental Andrew File System
driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index e8956b65d7ff..b49b8fe682f3 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -5,11 +5,14 @@
kafs-y := \
addr_list.o \
+ addr_prefs.o \
callback.o \
cell.o \
+ cm_security.o \
cmservice.o \
dir.o \
dir_edit.o \
+ dir_search.o \
dir_silly.o \
dynroot.o \
file.o \
@@ -27,6 +30,7 @@ kafs-y := \
server.o \
server_list.o \
super.o \
+ validation.o \
vlclient.o \
vl_alias.o \
vl_list.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index de1ae0bead3b..e941da5b6dd9 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -13,26 +13,55 @@
#include "internal.h"
#include "afs_fs.h"
+static void afs_free_addrlist(struct rcu_head *rcu)
+{
+ struct afs_addr_list *alist = container_of(rcu, struct afs_addr_list, rcu);
+ unsigned int i;
+
+ for (i = 0; i < alist->nr_addrs; i++)
+ rxrpc_kernel_put_peer(alist->addrs[i].peer);
+ trace_afs_alist(alist->debug_id, refcount_read(&alist->usage), afs_alist_trace_free);
+ kfree(alist);
+}
+
/*
* Release an address list.
*/
-void afs_put_addrlist(struct afs_addr_list *alist)
+void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
{
- if (alist && refcount_dec_and_test(&alist->usage))
- kfree_rcu(alist, rcu);
+ unsigned int debug_id;
+ bool dead;
+ int r;
+
+ if (!alist)
+ return;
+ debug_id = alist->debug_id;
+ dead = __refcount_dec_and_test(&alist->usage, &r);
+ trace_afs_alist(debug_id, r - 1, reason);
+ if (dead)
+ call_rcu(&alist->rcu, afs_free_addrlist);
+}
+
+struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
+{
+ int r;
+
+ if (alist) {
+ __refcount_inc(&alist->usage, &r);
+ trace_afs_alist(alist->debug_id, r + 1, reason);
+ }
+ return alist;
}
/*
* Allocate an address list.
*/
-struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
- unsigned short service,
- unsigned short port)
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr)
{
struct afs_addr_list *alist;
- unsigned int i;
+ static atomic_t debug_id;
- _enter("%u,%u,%u", nr, service, port);
+ _enter("%u", nr);
if (nr > AFS_MAX_ADDRESSES)
nr = AFS_MAX_ADDRESSES;
@@ -43,17 +72,8 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
refcount_set(&alist->usage, 1);
alist->max_addrs = nr;
-
- for (i = 0; i < nr; i++) {
- struct sockaddr_rxrpc *srx = &alist->addrs[i];
- srx->srx_family = AF_RXRPC;
- srx->srx_service = service;
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin6);
- srx->transport.sin6.sin6_family = AF_INET6;
- srx->transport.sin6.sin6_port = htons(port);
- }
-
+ alist->debug_id = atomic_inc_return(&debug_id);
+ trace_afs_alist(alist->debug_id, 1, afs_alist_trace_alloc);
return alist;
}
@@ -126,7 +146,7 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
if (!vllist->servers[0].server)
goto error_vl;
- alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+ alist = afs_alloc_addrlist(nr);
if (!alist)
goto error;
@@ -197,9 +217,11 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
}
if (family == AF_INET)
- afs_merge_fs_addr4(alist, x[0], xport);
+ ret = afs_merge_fs_addr4(net, alist, x[0], xport);
else
- afs_merge_fs_addr6(alist, x, xport);
+ ret = afs_merge_fs_addr6(net, alist, x, xport);
+ if (ret < 0)
+ goto error;
} while (p < end);
@@ -216,26 +238,13 @@ bad_address:
problem, p - text, (int)len, (int)len, text);
ret = -EINVAL;
error:
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
error_vl:
afs_put_vlserverlist(net, vllist);
return ERR_PTR(ret);
}
/*
- * Compare old and new address lists to see if there's been any change.
- * - How to do this in better than O(Nlog(N)) time?
- * - We don't really want to sort the address list, but would rather take the
- * list as we got it so as not to undo record rotation by the DNS server.
- */
-#if 0
-static int afs_cmp_addr_list(const struct afs_addr_list *a1,
- const struct afs_addr_list *a2)
-{
-}
-#endif
-
-/*
* Perform a DNS query for VL servers and build a up an address list.
*/
struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
@@ -271,25 +280,33 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
/*
* Merge an IPv4 entry into a fileserver address list.
*/
-void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
+int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist,
+ __be32 xdr, u16 port)
{
- struct sockaddr_rxrpc *srx;
- u32 addr = ntohl(xdr);
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
int i;
if (alist->nr_addrs >= alist->max_addrs)
- return;
+ return 0;
- for (i = 0; i < alist->nr_ipv4; i++) {
- struct sockaddr_in *a = &alist->addrs[i].transport.sin;
- u32 a_addr = ntohl(a->sin_addr.s_addr);
- u16 a_port = ntohs(a->sin_port);
+ srx.srx_family = AF_RXRPC;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin);
+ srx.transport.sin.sin_family = AF_INET;
+ srx.transport.sin.sin_port = htons(port);
+ srx.transport.sin.sin_addr.s_addr = xdr;
- if (addr == a_addr && port == a_port)
- return;
- if (addr == a_addr && port < a_port)
- break;
- if (addr < a_addr)
+ peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
+ if (!peer)
+ return -ENOMEM;
+
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ if (peer == alist->addrs[i].peer) {
+ rxrpc_kernel_put_peer(peer);
+ return 0;
+ }
+ if (peer <= alist->addrs[i].peer)
break;
}
@@ -298,38 +315,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
- srx = &alist->addrs[i];
- srx->srx_family = AF_RXRPC;
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.sin.sin_family = AF_INET;
- srx->transport.sin.sin_port = htons(port);
- srx->transport.sin.sin_addr.s_addr = xdr;
+ alist->addrs[i].peer = peer;
alist->nr_ipv4++;
alist->nr_addrs++;
+ return 0;
}
/*
* Merge an IPv6 entry into a fileserver address list.
*/
-void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
+int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist,
+ __be32 *xdr, u16 port)
{
- struct sockaddr_rxrpc *srx;
- int i, diff;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
+ int i;
if (alist->nr_addrs >= alist->max_addrs)
- return;
+ return 0;
- for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
- struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6;
- u16 a_port = ntohs(a->sin6_port);
+ srx.srx_family = AF_RXRPC;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(port);
+ memcpy(&srx.transport.sin6.sin6_addr, xdr, 16);
- diff = memcmp(xdr, &a->sin6_addr, 16);
- if (diff == 0 && port == a_port)
- return;
- if (diff == 0 && port < a_port)
- break;
- if (diff < 0)
+ peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
+ if (!peer)
+ return -ENOMEM;
+
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ if (peer == alist->addrs[i].peer) {
+ rxrpc_kernel_put_peer(peer);
+ return 0;
+ }
+ if (peer <= alist->addrs[i].peer)
break;
}
@@ -337,68 +358,57 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
memmove(alist->addrs + i + 1,
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
-
- srx = &alist->addrs[i];
- srx->srx_family = AF_RXRPC;
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin6);
- srx->transport.sin6.sin6_family = AF_INET6;
- srx->transport.sin6.sin6_port = htons(port);
- memcpy(&srx->transport.sin6.sin6_addr, xdr, 16);
+ alist->addrs[i].peer = peer;
alist->nr_addrs++;
+ return 0;
}
/*
- * Get an address to try.
+ * Set the app data on the rxrpc peers an address list points to
*/
-bool afs_iterate_addresses(struct afs_addr_cursor *ac)
+void afs_set_peer_appdata(struct afs_server *server,
+ struct afs_addr_list *old_alist,
+ struct afs_addr_list *new_alist)
{
- unsigned long set, failed;
- int index;
-
- if (!ac->alist)
- return false;
-
- set = ac->alist->responded;
- failed = ac->alist->failed;
- _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
-
- ac->nr_iterations++;
-
- set &= ~(failed | ac->tried);
-
- if (!set)
- return false;
-
- index = READ_ONCE(ac->alist->preferred);
- if (test_bit(index, &set))
- goto selected;
+ unsigned long data = (unsigned long)server;
+ int n = 0, o = 0;
- index = __ffs(set);
-
-selected:
- ac->index = index;
- set_bit(index, &ac->tried);
- ac->responded = false;
- return true;
-}
+ if (!old_alist) {
+ /* New server. Just set all. */
+ for (; n < new_alist->nr_addrs; n++)
+ rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data);
+ return;
+ }
+ if (!new_alist) {
+ /* Dead server. Just remove all. */
+ for (; o < old_alist->nr_addrs; o++)
+ rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0);
+ return;
+ }
-/*
- * Release an address list cursor.
- */
-int afs_end_cursor(struct afs_addr_cursor *ac)
-{
- struct afs_addr_list *alist;
+ /* Walk through the two lists simultaneously, setting new peers and
+ * clearing old ones. The two lists are ordered by pointer to peer
+ * record.
+ */
+ while (n < new_alist->nr_addrs && o < old_alist->nr_addrs) {
+ struct rxrpc_peer *pn = new_alist->addrs[n].peer;
+ struct rxrpc_peer *po = old_alist->addrs[o].peer;
- alist = ac->alist;
- if (alist) {
- if (ac->responded &&
- ac->index != alist->preferred &&
- test_bit(ac->alist->preferred, &ac->tried))
- WRITE_ONCE(alist->preferred, ac->index);
- afs_put_addrlist(alist);
- ac->alist = NULL;
+ if (pn == po)
+ continue;
+ if (pn < po) {
+ rxrpc_kernel_set_peer_data(pn, data);
+ n++;
+ } else {
+ rxrpc_kernel_set_peer_data(po, 0);
+ o++;
+ }
}
- return ac->error;
+ if (n < new_alist->nr_addrs)
+ for (; n < new_alist->nr_addrs; n++)
+ rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data);
+ if (o < old_alist->nr_addrs)
+ for (; o < old_alist->nr_addrs; o++)
+ rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0);
}
diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c
new file mode 100644
index 000000000000..133736412c3d
--- /dev/null
+++ b/fs/afs/addr_prefs.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Address preferences management
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": addr_prefs: " fmt
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/seq_file.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
+{
+ return afs_net(seq_file_single_net(m));
+}
+
+/*
+ * Split a NUL-terminated string up to the first newline around spaces. The
+ * source string will be modified to have NUL-terminations inserted.
+ */
+static int afs_split_string(char **pbuf, char *strv[], unsigned int maxstrv)
+{
+ unsigned int count = 0;
+ char *p = *pbuf;
+
+ maxstrv--; /* Allow for terminal NULL */
+ for (;;) {
+ /* Skip over spaces */
+ while (isspace(*p)) {
+ if (*p == '\n') {
+ p++;
+ break;
+ }
+ p++;
+ }
+ if (!*p)
+ break;
+
+ /* Mark start of word */
+ if (count >= maxstrv) {
+ pr_warn("Too many elements in string\n");
+ return -EINVAL;
+ }
+ strv[count++] = p;
+
+ /* Skip over word */
+ while (!isspace(*p) && *p)
+ p++;
+ if (!*p)
+ break;
+
+ /* Mark end of word */
+ if (*p == '\n') {
+ *p++ = 0;
+ break;
+ }
+ *p++ = 0;
+ }
+
+ *pbuf = p;
+ strv[count] = NULL;
+ return count;
+}
+
+/*
+ * Parse an address with an optional subnet mask.
+ */
+static int afs_parse_address(char *p, struct afs_addr_preference *pref)
+{
+ const char *stop;
+ unsigned long mask, tmp;
+ char *end = p + strlen(p);
+ bool bracket = false;
+
+ if (*p == '[') {
+ p++;
+ bracket = true;
+ }
+
+#if 0
+ if (*p == '[') {
+ p++;
+ q = memchr(p, ']', end - p);
+ if (!q) {
+ pr_warn("Can't find closing ']'\n");
+ return -EINVAL;
+ }
+ } else {
+ for (q = p; q < end; q++)
+ if (*q == '/')
+ break;
+ }
+#endif
+
+ if (in4_pton(p, end - p, (u8 *)&pref->ipv4_addr, -1, &stop)) {
+ pref->family = AF_INET;
+ mask = 32;
+ } else if (in6_pton(p, end - p, (u8 *)&pref->ipv6_addr, -1, &stop)) {
+ pref->family = AF_INET6;
+ mask = 128;
+ } else {
+ pr_warn("Can't determine address family\n");
+ return -EINVAL;
+ }
+
+ p = (char *)stop;
+ if (bracket) {
+ if (*p != ']') {
+ pr_warn("Can't find closing ']'\n");
+ return -EINVAL;
+ }
+ p++;
+ }
+
+ if (*p == '/') {
+ p++;
+ tmp = simple_strtoul(p, &p, 10);
+ if (tmp > mask) {
+ pr_warn("Subnet mask too large\n");
+ return -EINVAL;
+ }
+ if (tmp == 0) {
+ pr_warn("Subnet mask too small\n");
+ return -EINVAL;
+ }
+ mask = tmp;
+ }
+
+ if (*p) {
+ pr_warn("Invalid address\n");
+ return -EINVAL;
+ }
+
+ pref->subnet_mask = mask;
+ return 0;
+}
+
+enum cmp_ret {
+ CONTINUE_SEARCH,
+ INSERT_HERE,
+ EXACT_MATCH,
+ SUBNET_MATCH,
+};
+
+/*
+ * See if a candidate address matches a listed address.
+ */
+static enum cmp_ret afs_cmp_address_pref(const struct afs_addr_preference *a,
+ const struct afs_addr_preference *b)
+{
+ int subnet = min(a->subnet_mask, b->subnet_mask);
+ const __be32 *pa, *pb;
+ u32 mask, na, nb;
+ int diff;
+
+ if (a->family != b->family)
+ return INSERT_HERE;
+
+ switch (a->family) {
+ case AF_INET6:
+ pa = a->ipv6_addr.s6_addr32;
+ pb = b->ipv6_addr.s6_addr32;
+ break;
+ case AF_INET:
+ pa = &a->ipv4_addr.s_addr;
+ pb = &b->ipv4_addr.s_addr;
+ break;
+ }
+
+ while (subnet > 32) {
+ diff = ntohl(*pa++) - ntohl(*pb++);
+ if (diff < 0)
+ return INSERT_HERE; /* a<b */
+ if (diff > 0)
+ return CONTINUE_SEARCH; /* a>b */
+ subnet -= 32;
+ }
+
+ if (subnet == 0)
+ return EXACT_MATCH;
+
+ mask = 0xffffffffU << (32 - subnet);
+ na = ntohl(*pa);
+ nb = ntohl(*pb);
+ diff = (na & mask) - (nb & mask);
+ //kdebug("diff %08x %08x %08x %d", na, nb, mask, diff);
+ if (diff < 0)
+ return INSERT_HERE; /* a<b */
+ if (diff > 0)
+ return CONTINUE_SEARCH; /* a>b */
+ if (a->subnet_mask == b->subnet_mask)
+ return EXACT_MATCH;
+ if (a->subnet_mask > b->subnet_mask)
+ return SUBNET_MATCH; /* a binds tighter than b */
+ return CONTINUE_SEARCH; /* b binds tighter than a */
+}
+
+/*
+ * Insert an address preference.
+ */
+static int afs_insert_address_pref(struct afs_addr_preference_list **_preflist,
+ struct afs_addr_preference *pref,
+ int index)
+{
+ struct afs_addr_preference_list *preflist = *_preflist, *old = preflist;
+ size_t size, max_prefs;
+
+ _enter("{%u/%u/%u},%u", preflist->ipv6_off, preflist->nr, preflist->max_prefs, index);
+
+ if (preflist->nr == 255)
+ return -ENOSPC;
+ if (preflist->nr >= preflist->max_prefs) {
+ max_prefs = preflist->max_prefs + 1;
+ size = struct_size(preflist, prefs, max_prefs);
+ size = roundup_pow_of_two(size);
+ max_prefs = min_t(size_t, (size - sizeof(*preflist)) / sizeof(*pref), 255);
+ preflist = kmalloc(size, GFP_KERNEL);
+ if (!preflist)
+ return -ENOMEM;
+ *preflist = **_preflist;
+ preflist->max_prefs = max_prefs;
+ *_preflist = preflist;
+
+ if (index < preflist->nr)
+ memcpy(preflist->prefs + index + 1, old->prefs + index,
+ sizeof(*pref) * (preflist->nr - index));
+ if (index > 0)
+ memcpy(preflist->prefs, old->prefs, sizeof(*pref) * index);
+ } else {
+ if (index < preflist->nr)
+ memmove(preflist->prefs + index + 1, preflist->prefs + index,
+ sizeof(*pref) * (preflist->nr - index));
+ }
+
+ preflist->prefs[index] = *pref;
+ preflist->nr++;
+ if (pref->family == AF_INET)
+ preflist->ipv6_off++;
+ return 0;
+}
+
+/*
+ * Add an address preference.
+ * echo "add <proto> <IP>[/<mask>] <prior>" >/proc/fs/afs/addr_prefs
+ */
+static int afs_add_address_pref(struct afs_net *net, struct afs_addr_preference_list **_preflist,
+ int argc, char **argv)
+{
+ struct afs_addr_preference_list *preflist = *_preflist;
+ struct afs_addr_preference pref;
+ enum cmp_ret cmp;
+ int ret, i, stop;
+
+ if (argc != 3) {
+ pr_warn("Wrong number of params\n");
+ return -EINVAL;
+ }
+
+ if (strcmp(argv[0], "udp") != 0) {
+ pr_warn("Unsupported protocol\n");
+ return -EINVAL;
+ }
+
+ ret = afs_parse_address(argv[1], &pref);
+ if (ret < 0)
+ return ret;
+
+ ret = kstrtou16(argv[2], 10, &pref.prio);
+ if (ret < 0) {
+ pr_warn("Invalid priority\n");
+ return ret;
+ }
+
+ if (pref.family == AF_INET) {
+ i = 0;
+ stop = preflist->ipv6_off;
+ } else {
+ i = preflist->ipv6_off;
+ stop = preflist->nr;
+ }
+
+ for (; i < stop; i++) {
+ cmp = afs_cmp_address_pref(&pref, &preflist->prefs[i]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ case SUBNET_MATCH:
+ return afs_insert_address_pref(_preflist, &pref, i);
+ case EXACT_MATCH:
+ preflist->prefs[i].prio = pref.prio;
+ return 0;
+ }
+ }
+
+ return afs_insert_address_pref(_preflist, &pref, i);
+}
+
+/*
+ * Delete an address preference.
+ */
+static int afs_delete_address_pref(struct afs_addr_preference_list **_preflist,
+ int index)
+{
+ struct afs_addr_preference_list *preflist = *_preflist;
+
+ _enter("{%u/%u/%u},%u", preflist->ipv6_off, preflist->nr, preflist->max_prefs, index);
+
+ if (preflist->nr == 0)
+ return -ENOENT;
+
+ if (index < preflist->nr - 1)
+ memmove(preflist->prefs + index, preflist->prefs + index + 1,
+ sizeof(preflist->prefs[0]) * (preflist->nr - index - 1));
+
+ if (index < preflist->ipv6_off)
+ preflist->ipv6_off--;
+ preflist->nr--;
+ return 0;
+}
+
+/*
+ * Delete an address preference.
+ * echo "del <proto> <IP>[/<mask>]" >/proc/fs/afs/addr_prefs
+ */
+static int afs_del_address_pref(struct afs_net *net, struct afs_addr_preference_list **_preflist,
+ int argc, char **argv)
+{
+ struct afs_addr_preference_list *preflist = *_preflist;
+ struct afs_addr_preference pref;
+ enum cmp_ret cmp;
+ int ret, i, stop;
+
+ if (argc != 2) {
+ pr_warn("Wrong number of params\n");
+ return -EINVAL;
+ }
+
+ if (strcmp(argv[0], "udp") != 0) {
+ pr_warn("Unsupported protocol\n");
+ return -EINVAL;
+ }
+
+ ret = afs_parse_address(argv[1], &pref);
+ if (ret < 0)
+ return ret;
+
+ if (pref.family == AF_INET) {
+ i = 0;
+ stop = preflist->ipv6_off;
+ } else {
+ i = preflist->ipv6_off;
+ stop = preflist->nr;
+ }
+
+ for (; i < stop; i++) {
+ cmp = afs_cmp_address_pref(&pref, &preflist->prefs[i]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ case SUBNET_MATCH:
+ return 0;
+ case EXACT_MATCH:
+ return afs_delete_address_pref(_preflist, i);
+ }
+ }
+
+ return -ENOANO;
+}
+
+/*
+ * Handle writes to /proc/fs/afs/addr_prefs
+ */
+int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size)
+{
+ struct afs_addr_preference_list *preflist, *old;
+ struct seq_file *m = file->private_data;
+ struct afs_net *net = afs_seq2net_single(m);
+ size_t psize;
+ char *argv[5];
+ int ret, argc, max_prefs;
+
+ inode_lock(file_inode(file));
+
+ /* Allocate a candidate new list and initialise it from the old. */
+ old = rcu_dereference_protected(net->address_prefs,
+ lockdep_is_held(&file_inode(file)->i_rwsem));
+
+ if (old)
+ max_prefs = old->nr + 1;
+ else
+ max_prefs = 1;
+
+ psize = struct_size(old, prefs, max_prefs);
+ psize = roundup_pow_of_two(psize);
+ max_prefs = min_t(size_t, (psize - sizeof(*old)) / sizeof(old->prefs[0]), 255);
+
+ ret = -ENOMEM;
+ preflist = kmalloc(struct_size(preflist, prefs, max_prefs), GFP_KERNEL);
+ if (!preflist)
+ goto done;
+
+ if (old)
+ memcpy(preflist, old, struct_size(preflist, prefs, old->nr));
+ else
+ memset(preflist, 0, sizeof(*preflist));
+ preflist->max_prefs = max_prefs;
+
+ do {
+ argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv));
+ if (argc < 0) {
+ ret = argc;
+ goto done;
+ }
+ if (argc < 2)
+ goto inval;
+
+ if (strcmp(argv[0], "add") == 0)
+ ret = afs_add_address_pref(net, &preflist, argc - 1, argv + 1);
+ else if (strcmp(argv[0], "del") == 0)
+ ret = afs_del_address_pref(net, &preflist, argc - 1, argv + 1);
+ else
+ goto inval;
+ if (ret < 0)
+ goto done;
+ } while (*buf);
+
+ preflist->version++;
+ rcu_assign_pointer(net->address_prefs, preflist);
+ /* Store prefs before version */
+ smp_store_release(&net->address_pref_version, preflist->version);
+ kfree_rcu(old, rcu);
+ preflist = NULL;
+ ret = 0;
+
+done:
+ kfree(preflist);
+ inode_unlock(file_inode(file));
+ _leave(" = %d", ret);
+ return ret;
+
+inval:
+ pr_warn("Invalid Command\n");
+ ret = -EINVAL;
+ goto done;
+}
+
+/*
+ * Mark the priorities on an address list if the address preferences table has
+ * changed. The caller must hold the RCU read lock.
+ */
+void afs_get_address_preferences_rcu(struct afs_net *net, struct afs_addr_list *alist)
+{
+ const struct afs_addr_preference_list *preflist =
+ rcu_dereference(net->address_prefs);
+ const struct sockaddr_in6 *sin6;
+ const struct sockaddr_in *sin;
+ const struct sockaddr *sa;
+ struct afs_addr_preference test;
+ enum cmp_ret cmp;
+ int i, j;
+
+ if (!preflist || !preflist->nr || !alist->nr_addrs ||
+ smp_load_acquire(&alist->addr_pref_version) == preflist->version)
+ return;
+
+ test.family = AF_INET;
+ test.subnet_mask = 32;
+ test.prio = 0;
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ sa = rxrpc_kernel_remote_addr(alist->addrs[i].peer);
+ sin = (const struct sockaddr_in *)sa;
+ test.ipv4_addr = sin->sin_addr;
+ for (j = 0; j < preflist->ipv6_off; j++) {
+ cmp = afs_cmp_address_pref(&test, &preflist->prefs[j]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ break;
+ case EXACT_MATCH:
+ case SUBNET_MATCH:
+ WRITE_ONCE(alist->addrs[i].prio, preflist->prefs[j].prio);
+ break;
+ }
+ }
+ }
+
+ test.family = AF_INET6;
+ test.subnet_mask = 128;
+ test.prio = 0;
+ for (; i < alist->nr_addrs; i++) {
+ sa = rxrpc_kernel_remote_addr(alist->addrs[i].peer);
+ sin6 = (const struct sockaddr_in6 *)sa;
+ test.ipv6_addr = sin6->sin6_addr;
+ for (j = preflist->ipv6_off; j < preflist->nr; j++) {
+ cmp = afs_cmp_address_pref(&test, &preflist->prefs[j]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ break;
+ case EXACT_MATCH:
+ case SUBNET_MATCH:
+ WRITE_ONCE(alist->addrs[i].prio, preflist->prefs[j].prio);
+ break;
+ }
+ }
+ }
+
+ smp_store_release(&alist->addr_pref_version, preflist->version);
+}
+
+/*
+ * Mark the priorities on an address list if the address preferences table has
+ * changed. Avoid taking the RCU read lock if we can.
+ */
+void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alist)
+{
+ if (!net->address_prefs ||
+ /* Load version before prefs */
+ smp_load_acquire(&net->address_pref_version) == alist->addr_pref_version)
+ return;
+
+ rcu_read_lock();
+ afs_get_address_preferences_rcu(net, alist);
+ rcu_read_unlock();
+}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 432cb4b23961..ec3db00bd081 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -10,7 +10,7 @@
#include <linux/in.h>
-#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */
+#define AFS_MAXCELLNAME 253 /* Maximum length of a cell name (DNS limited) */
#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */
#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */
#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */
@@ -19,8 +19,8 @@
#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
-#define AFS_VL_MAX_LIFESPAN (120 * HZ)
-#define AFS_PROBE_MAX_LIFESPAN (30 * HZ)
+#define AFS_VL_MAX_LIFESPAN 120
+#define AFS_PROBE_MAX_LIFESPAN 30
typedef u64 afs_volid_t;
typedef u64 afs_vnodeid_t;
@@ -165,7 +165,8 @@ struct afs_status_cb {
* AFS volume synchronisation information
*/
struct afs_volsync {
- time64_t creation; /* volume creation time */
+ time64_t creation; /* Volume creation time (or TIME64_MIN) */
+ time64_t update; /* Volume update time (or TIME64_MIN) */
};
/*
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 9c65ffb8a523..b835e25a2c02 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -13,6 +13,7 @@
#define AFS_VL_PORT 7003 /* volume location service port */
#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
+#define YFS_VL_MAXCELLNAME 256 /* Maximum length of a cell name in YFS protocol */
enum AFSVL_Operations {
VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
@@ -134,13 +135,4 @@ struct afs_uvldbentry__xdr {
__be32 spares9;
};
-struct afs_address_list {
- refcount_t usage;
- unsigned int version;
- unsigned int nr_addrs;
- struct sockaddr_rxrpc addrs[];
-};
-
-extern void afs_put_address_list(struct afs_address_list *alist);
-
#endif /* AFS_VL_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index a484fa642808..894d2bad6b6c 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -33,22 +33,20 @@ void afs_invalidate_mmap_work(struct work_struct *work)
unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
}
-void afs_server_init_callback_work(struct work_struct *work)
+static void afs_volume_init_callback(struct afs_volume *volume)
{
- struct afs_server *server = container_of(work, struct afs_server, initcb_work);
struct afs_vnode *vnode;
- struct afs_cell *cell = server->cell;
- down_read(&cell->fs_open_mmaps_lock);
+ down_read(&volume->open_mmaps_lock);
- list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) {
- if (vnode->cb_server == server) {
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- queue_work(system_unbound_wq, &vnode->cb_work);
+ list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
+ if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb);
+ queue_work(system_dfl_wq, &vnode->cb_work);
}
}
- up_read(&cell->fs_open_mmaps_lock);
+ up_read(&volume->open_mmaps_lock);
}
/*
@@ -57,15 +55,20 @@ void afs_server_init_callback_work(struct work_struct *work)
*/
void afs_init_callback_state(struct afs_server *server)
{
- rcu_read_lock();
- do {
- server->cb_s_break++;
- atomic_inc(&server->cell->fs_s_break);
- if (!list_empty(&server->cell->fs_open_mmaps))
- queue_work(system_unbound_wq, &server->initcb_work);
+ struct afs_server_entry *se;
- } while ((server = rcu_dereference(server->uuid_next)));
- rcu_read_unlock();
+ down_read(&server->cell->vs_lock);
+
+ list_for_each_entry(se, &server->volumes, slink) {
+ se->cb_expires_at = AFS_NO_CB_PROMISE;
+ se->volume->cb_expires_at = AFS_NO_CB_PROMISE;
+ trace_afs_cb_v_break(se->volume->vid, atomic_read(&se->volume->cb_v_break),
+ afs_cb_break_for_s_reinit);
+ if (!list_empty(&se->volume->open_mmaps))
+ afs_volume_init_callback(se->volume);
+ }
+
+ up_read(&server->cell->vs_lock);
}
/*
@@ -76,9 +79,9 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
_enter("");
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) {
vnode->cb_break++;
- vnode->cb_v_break = vnode->volume->cb_v_break;
+ vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
afs_clear_permits(vnode);
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
@@ -87,7 +90,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
if (reason != afs_cb_break_for_deleted &&
vnode->status.type == AFS_FTYPE_FILE &&
atomic_read(&vnode->cb_nr_mmap))
- queue_work(system_unbound_wq, &vnode->cb_work);
+ queue_work(system_dfl_wq, &vnode->cb_work);
trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
} else {
@@ -110,13 +113,14 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
{
struct afs_volume *volume = NULL;
struct rb_node *p;
- int seq = 0;
+ int seq = 1;
- do {
+ for (;;) {
/* Unfortunately, rbtree walking doesn't give reliable results
* under just the RCU read lock, so we have to check for
* changes.
*/
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&cell->volume_lock, &seq);
p = rcu_dereference_raw(cell->volumes.rb_node);
@@ -132,35 +136,63 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
volume = NULL;
}
- } while (need_seqretry(&cell->volume_lock, seq));
+ if (volume && afs_try_get_volume(volume, afs_volume_trace_get_callback))
+ break;
+ if (!need_seqretry(&cell->volume_lock, seq))
+ break;
+ seq |= 1; /* Want a lock next time */
+ }
done_seqretry(&cell->volume_lock, seq);
return volume;
}
/*
+ * Allow the fileserver to break callbacks at the volume-level. This is
+ * typically done when, for example, a R/W volume is snapshotted to a R/O
+ * volume (the only way to change an R/O volume). It may also, however, happen
+ * when a volserver takes control of a volume (offlining it, moving it, etc.).
+ *
+ * Every file in that volume will need to be reevaluated.
+ */
+static void afs_break_volume_callback(struct afs_server *server,
+ struct afs_volume *volume)
+ __releases(RCU)
+{
+ struct afs_server_list *slist = rcu_dereference(volume->servers);
+ unsigned int i, cb_v_break;
+
+ write_lock(&volume->cb_v_break_lock);
+
+ for (i = 0; i < slist->nr_servers; i++)
+ if (slist->servers[i].server == server)
+ slist->servers[i].cb_expires_at = AFS_NO_CB_PROMISE;
+ volume->cb_expires_at = AFS_NO_CB_PROMISE;
+
+ cb_v_break = atomic_inc_return_release(&volume->cb_v_break);
+ trace_afs_cb_v_break(volume->vid, cb_v_break, afs_cb_break_for_volume_callback);
+
+ write_unlock(&volume->cb_v_break_lock);
+ rcu_read_unlock();
+
+ if (!list_empty(&volume->open_mmaps))
+ afs_volume_init_callback(volume);
+}
+
+/*
* allow the fileserver to explicitly break one callback
* - happens when
* - the backing file is changed
* - a lock is released
*/
-static void afs_break_one_callback(struct afs_volume *volume,
+static void afs_break_one_callback(struct afs_server *server,
+ struct afs_volume *volume,
struct afs_fid *fid)
{
struct super_block *sb;
struct afs_vnode *vnode;
struct inode *inode;
- if (fid->vnode == 0 && fid->unique == 0) {
- /* The callback break applies to an entire volume. */
- write_lock(&volume->cb_v_break_lock);
- volume->cb_v_break++;
- trace_afs_cb_break(fid, volume->cb_v_break,
- afs_cb_break_for_volume_callback, false);
- write_unlock(&volume->cb_v_break_lock);
- return;
- }
-
/* See if we can find a matching inode - even an I_NEW inode needs to
* be marked as it can have its callback broken before we finish
* setting up the local inode.
@@ -187,25 +219,35 @@ static void afs_break_some_callbacks(struct afs_server *server,
afs_volid_t vid = cbb->fid.vid;
size_t i;
+ rcu_read_lock();
volume = afs_lookup_volume_rcu(server->cell, vid);
+ if (cbb->fid.vnode == 0 && cbb->fid.unique == 0) {
+ afs_break_volume_callback(server, volume);
+ *_count -= 1;
+ if (*_count)
+ memmove(cbb, cbb + 1, sizeof(*cbb) * *_count);
+ } else {
+ /* TODO: Find all matching volumes if we couldn't match the server and
+ * break them anyway.
+ */
- /* TODO: Find all matching volumes if we couldn't match the server and
- * break them anyway.
- */
-
- for (i = *_count; i > 0; cbb++, i--) {
- if (cbb->fid.vid == vid) {
- _debug("- Fid { vl=%08llx n=%llu u=%u }",
- cbb->fid.vid,
- cbb->fid.vnode,
- cbb->fid.unique);
- --*_count;
- if (volume)
- afs_break_one_callback(volume, &cbb->fid);
- } else {
- *residue++ = *cbb;
+ for (i = *_count; i > 0; cbb++, i--) {
+ if (cbb->fid.vid == vid) {
+ _debug("- Fid { vl=%08llx n=%llu u=%u }",
+ cbb->fid.vid,
+ cbb->fid.vnode,
+ cbb->fid.unique);
+ --*_count;
+ if (volume)
+ afs_break_one_callback(server, volume, &cbb->fid);
+ } else {
+ *residue++ = *cbb;
+ }
}
+ rcu_read_unlock();
}
+
+ afs_put_volume(volume, afs_volume_trace_put_callback);
}
/*
@@ -218,11 +260,6 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
ASSERT(server != NULL);
- rcu_read_lock();
-
while (count > 0)
afs_break_some_callbacks(server, callbacks, &count);
-
- rcu_read_unlock();
- return;
}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 988c2ac7cece..71c10a05cebe 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,8 +20,9 @@ static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
static atomic_t cell_debug_id;
-static void afs_queue_cell_manager(struct afs_net *);
-static void afs_manage_cell_work(struct work_struct *);
+static void afs_cell_timer(struct timer_list *timer);
+static void afs_destroy_cell_work(struct work_struct *work);
+static void afs_manage_cell_work(struct work_struct *work);
static void afs_dec_cells_outstanding(struct afs_net *net)
{
@@ -29,19 +30,11 @@ static void afs_dec_cells_outstanding(struct afs_net *net)
wake_up_var(&net->cells_outstanding);
}
-/*
- * Set the cell timer to fire after a given delay, assuming it's not already
- * set for an earlier time.
- */
-static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
+static void afs_set_cell_state(struct afs_cell *cell, enum afs_cell_state state)
{
- if (net->live) {
- atomic_inc(&net->cells_outstanding);
- if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
- afs_dec_cells_outstanding(net);
- } else {
- afs_queue_cell_manager(net);
- }
+ smp_store_release(&cell->state, state); /* Commit cell changes before state */
+ smp_wmb(); /* Set cell state before task state */
+ wake_up_var(&cell->state);
}
/*
@@ -64,7 +57,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
return ERR_PTR(-ENAMETOOLONG);
if (!name) {
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell,
+ lockdep_is_held(&net->cells_lock));
if (!cell)
return ERR_PTR(-EDESTADDRREQ);
goto found;
@@ -115,7 +109,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
const char *name, unsigned int namelen,
const char *addresses)
{
- struct afs_vlserver_list *vllist;
+ struct afs_vlserver_list *vllist = NULL;
struct afs_cell *cell;
int i, ret;
@@ -146,28 +140,37 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
return ERR_PTR(-ENOMEM);
}
- cell->name = kmalloc(namelen + 1, GFP_KERNEL);
+ /* Allocate the cell name and the key name in one go. */
+ cell->name = kmalloc(1 + namelen + 1 +
+ 4 + namelen + 1, GFP_KERNEL);
if (!cell->name) {
kfree(cell);
return ERR_PTR(-ENOMEM);
}
- cell->net = net;
+ cell->name[0] = '.';
+ cell->name++;
cell->name_len = namelen;
for (i = 0; i < namelen; i++)
cell->name[i] = tolower(name[i]);
- cell->name[i] = 0;
+ cell->name[i++] = 0;
+
+ cell->key_desc = cell->name + i;
+ memcpy(cell->key_desc, "afs@", 4);
+ memcpy(cell->key_desc + 4, cell->name, cell->name_len + 1);
+ cell->net = net;
refcount_set(&cell->ref, 1);
atomic_set(&cell->active, 0);
+ INIT_WORK(&cell->destroyer, afs_destroy_cell_work);
INIT_WORK(&cell->manager, afs_manage_cell_work);
+ timer_setup(&cell->management_timer, afs_cell_timer, 0);
+ init_rwsem(&cell->vs_lock);
cell->volumes = RB_ROOT;
INIT_HLIST_HEAD(&cell->proc_volumes);
seqlock_init(&cell->volume_lock);
cell->fs_servers = RB_ROOT;
- seqlock_init(&cell->fs_lock);
- INIT_LIST_HEAD(&cell->fs_open_mmaps);
- init_rwsem(&cell->fs_open_mmaps_lock);
+ init_rwsem(&cell->fs_lock);
rwlock_init(&cell->vl_servers_lock);
cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
@@ -180,6 +183,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
VL_SERVICE, AFS_VL_PORT);
if (IS_ERR(vllist)) {
ret = PTR_ERR(vllist);
+ vllist = NULL;
goto parse_failed;
}
@@ -202,7 +206,13 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
cell->dns_status = vllist->status;
smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
atomic_inc(&net->cells_outstanding);
+ ret = idr_alloc_cyclic(&net->cells_dyn_ino, cell,
+ 2, INT_MAX / 2, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cell->dynroot_ino = ret;
cell->debug_id = atomic_inc_return(&cell_debug_id);
+
trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc);
_leave(" = %p", cell);
@@ -212,7 +222,8 @@ parse_failed:
if (ret == -EINVAL)
printk(KERN_ERR "kAFS: bad VL server IP address\n");
error:
- kfree(cell->name);
+ afs_put_vlserverlist(cell->net, vllist);
+ kfree(cell->name - 1);
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -224,7 +235,8 @@ error:
* @name: The name of the cell.
* @namesz: The strlen of the cell name.
* @vllist: A colon/comma separated list of numeric IP addresses or NULL.
- * @excl: T if an error should be given if the cell name already exists.
+ * @reason: The reason we're doing the lookup
+ * @trace: The reason to be logged if the lookup is successful.
*
* Look up a cell record by name and query the DNS for VL server addresses if
* needed. Note that that actual DNS query is punted off to the manager thread
@@ -233,19 +245,27 @@ error:
*/
struct afs_cell *afs_lookup_cell(struct afs_net *net,
const char *name, unsigned int namesz,
- const char *vllist, bool excl)
+ const char *vllist,
+ enum afs_lookup_cell_for reason,
+ enum afs_cell_trace trace)
{
struct afs_cell *cell, *candidate, *cursor;
struct rb_node *parent, **pp;
enum afs_cell_state state;
int ret, n;
- _enter("%s,%s", name, vllist);
+ _enter("%s,%s,%u", name, vllist, reason);
- if (!excl) {
- cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup);
- if (!IS_ERR(cell))
+ if (reason != AFS_LOOKUP_CELL_PRELOAD) {
+ cell = afs_find_cell(net, name, namesz, trace);
+ if (!IS_ERR(cell)) {
+ if (reason == AFS_LOOKUP_CELL_DYNROOT)
+ goto no_wait;
+ if (cell->state == AFS_CELL_SETTING_UP ||
+ cell->state == AFS_CELL_UNLOOKED)
+ goto lookup_cell;
goto wait_for_cell;
+ }
}
/* Assume we're probably going to create a cell and preallocate and
@@ -286,29 +306,74 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
cell = candidate;
candidate = NULL;
- atomic_set(&cell->active, 2);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert);
+ afs_use_cell(cell, trace);
rb_link_node_rcu(&cell->net_node, parent, pp);
rb_insert_color(&cell->net_node, &net->cells);
up_write(&net->cells_lock);
- afs_queue_cell(cell, afs_cell_trace_get_queue_new);
+lookup_cell:
+ if (reason != AFS_LOOKUP_CELL_PRELOAD &&
+ reason != AFS_LOOKUP_CELL_ROOTCELL) {
+ set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
+ afs_queue_cell(cell, afs_cell_trace_queue_new);
+ }
wait_for_cell:
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active),
- afs_cell_trace_wait);
- _debug("wait_for_cell");
- wait_var_event(&cell->state,
- ({
- state = smp_load_acquire(&cell->state); /* vs error */
- state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED;
- }));
+ state = smp_load_acquire(&cell->state); /* vs error */
+ switch (state) {
+ case AFS_CELL_ACTIVE:
+ case AFS_CELL_DEAD:
+ break;
+ case AFS_CELL_UNLOOKED:
+ default:
+ if (reason == AFS_LOOKUP_CELL_PRELOAD ||
+ reason == AFS_LOOKUP_CELL_ROOTCELL)
+ break;
+ _debug("wait_for_cell");
+ afs_see_cell(cell, afs_cell_trace_wait);
+ wait_var_event(&cell->state,
+ ({
+ state = smp_load_acquire(&cell->state); /* vs error */
+ state == AFS_CELL_ACTIVE || state == AFS_CELL_DEAD;
+ }));
+ _debug("waited_for_cell %d %d", cell->state, cell->error);
+ }
+no_wait:
/* Check the state obtained from the wait check. */
- if (state == AFS_CELL_REMOVED) {
+ state = smp_load_acquire(&cell->state); /* vs error */
+ if (state == AFS_CELL_DEAD) {
ret = cell->error;
goto error;
}
+ if (state == AFS_CELL_ACTIVE) {
+ switch (cell->dns_status) {
+ case DNS_LOOKUP_NOT_DONE:
+ if (cell->dns_source == DNS_RECORD_FROM_CONFIG) {
+ ret = 0;
+ break;
+ }
+ fallthrough;
+ default:
+ ret = -EIO;
+ goto error;
+ case DNS_LOOKUP_GOOD:
+ case DNS_LOOKUP_GOOD_WITH_BAD:
+ ret = 0;
+ break;
+ case DNS_LOOKUP_GOT_NOT_FOUND:
+ ret = -ENOENT;
+ goto error;
+ case DNS_LOOKUP_BAD:
+ ret = -EREMOTEIO;
+ goto error;
+ case DNS_LOOKUP_GOT_LOCAL_FAILURE:
+ case DNS_LOOKUP_GOT_TEMP_FAILURE:
+ case DNS_LOOKUP_GOT_NS_FAILURE:
+ ret = -EDESTADDRREQ;
+ goto error;
+ }
+ }
_leave(" = %p [cell]", cell);
return cell;
@@ -316,10 +381,10 @@ wait_for_cell:
cell_already_exists:
_debug("cell exists");
cell = cursor;
- if (excl) {
+ if (reason == AFS_LOOKUP_CELL_PRELOAD) {
ret = -EEXIST;
} else {
- afs_use_cell(cursor, afs_cell_trace_use_lookup);
+ afs_use_cell(cursor, trace);
ret = 0;
}
up_write(&net->cells_lock);
@@ -329,7 +394,7 @@ cell_already_exists:
goto wait_for_cell;
goto error_noput;
error:
- afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup);
+ afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_error);
error_noput:
_leave(" = %d [error]", ret);
return ERR_PTR(ret);
@@ -366,8 +431,18 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
len = cp - rootcell;
}
- /* allocate a cell record for the root cell */
- new_root = afs_lookup_cell(net, rootcell, len, vllist, false);
+ if (len == 0 || !rootcell[0] || rootcell[0] == '.' || rootcell[len - 1] == '.')
+ return -EINVAL;
+ if (memchr(rootcell, '/', len))
+ return -EINVAL;
+ cp = strstr(rootcell, "..");
+ if (cp && cp < rootcell + len)
+ return -EINVAL;
+
+ /* allocate a cell record for the root/workstation cell */
+ new_root = afs_lookup_cell(net, rootcell, len, vllist,
+ AFS_LOOKUP_CELL_ROOTCELL,
+ afs_cell_trace_use_lookup_ws);
if (IS_ERR(new_root)) {
_leave(" = %ld", PTR_ERR(new_root));
return PTR_ERR(new_root);
@@ -378,12 +453,11 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
/* install the new cell */
down_write(&net->cells_lock);
- afs_see_cell(new_root, afs_cell_trace_see_ws);
- old_root = net->ws_cell;
- net->ws_cell = new_root;
+ old_root = rcu_replace_pointer(net->ws_cell, new_root,
+ lockdep_is_held(&net->cells_lock));
up_write(&net->cells_lock);
- afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws);
+ afs_unuse_cell(old_root, afs_cell_trace_unuse_ws);
_leave(" = 0");
return 0;
}
@@ -409,10 +483,12 @@ static int afs_update_cell(struct afs_cell *cell)
if (ret == -ENOMEM)
goto out_wake;
- ret = -ENOMEM;
vllist = afs_alloc_vlserver_list(0);
- if (!vllist)
+ if (!vllist) {
+ if (ret >= 0)
+ ret = -ENOMEM;
goto out_wake;
+ }
switch (ret) {
case -ENODATA:
@@ -499,39 +575,24 @@ static void afs_cell_destroy(struct rcu_head *rcu)
trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free);
afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers));
- afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias);
+ afs_unuse_cell(cell->alias_of, afs_cell_trace_unuse_alias);
key_put(cell->anonymous_key);
- kfree(cell->name);
+ idr_remove(&net->cells_dyn_ino, cell->dynroot_ino);
+ kfree(cell->name - 1);
kfree(cell);
afs_dec_cells_outstanding(net);
_leave(" [destroyed]");
}
-/*
- * Queue the cell manager.
- */
-static void afs_queue_cell_manager(struct afs_net *net)
-{
- int outstanding = atomic_inc_return(&net->cells_outstanding);
-
- _enter("%d", outstanding);
-
- if (!queue_work(afs_wq, &net->cells_manager))
- afs_dec_cells_outstanding(net);
-}
-
-/*
- * Cell management timer. We have an increment on cells_outstanding that we
- * need to pass along to the work item.
- */
-void afs_cells_timer(struct timer_list *timer)
+static void afs_destroy_cell_work(struct work_struct *work)
{
- struct afs_net *net = container_of(timer, struct afs_net, cells_timer);
+ struct afs_cell *cell = container_of(work, struct afs_cell, destroyer);
- _enter("");
- if (!queue_work(afs_wq, &net->cells_manager))
- afs_dec_cells_outstanding(net);
+ afs_see_cell(cell, afs_cell_trace_destroy);
+ timer_delete_sync(&cell->management_timer);
+ cancel_work_sync(&cell->manager);
+ call_rcu(&cell->rcu, afs_cell_destroy);
}
/*
@@ -563,7 +624,7 @@ void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
if (zero) {
a = atomic_read(&cell->active);
WARN(a != 0, "Cell active count %u > 0\n", a);
- call_rcu(&cell->rcu, afs_cell_destroy);
+ WARN_ON(!queue_work(afs_wq, &cell->destroyer));
}
}
}
@@ -575,10 +636,9 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
int r, a;
- r = refcount_read(&cell->ref);
- WARN_ON(r == 0);
+ __refcount_inc(&cell->ref, &r);
a = atomic_inc_return(&cell->active);
- trace_afs_cell(cell->debug_id, r, a, reason);
+ trace_afs_cell(cell->debug_id, r + 1, a, reason);
return cell;
}
@@ -586,10 +646,11 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
* Record a cell becoming less active. When the active counter reaches 1, it
* is scheduled for destruction, but may get reactivated.
*/
-void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason)
+void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
unsigned int debug_id;
time64_t now, expire_delay;
+ bool zero;
int r, a;
if (!cell)
@@ -604,13 +665,15 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
expire_delay = afs_cell_gc_delay;
debug_id = cell->debug_id;
- r = refcount_read(&cell->ref);
a = atomic_dec_return(&cell->active);
- trace_afs_cell(debug_id, r, a, reason);
- WARN_ON(a == 0);
- if (a == 1)
+ if (!a)
/* 'cell' may now be garbage collected. */
- afs_set_cell_timer(net, expire_delay);
+ afs_set_cell_timer(cell, expire_delay);
+
+ zero = __refcount_dec_and_test(&cell->ref, &r);
+ trace_afs_cell(debug_id, r - 1, a, reason);
+ if (zero)
+ WARN_ON(!queue_work(afs_wq, &cell->destroyer));
}
/*
@@ -630,36 +693,27 @@ void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason)
*/
void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
- afs_get_cell(cell, reason);
- if (!queue_work(afs_wq, &cell->manager))
- afs_put_cell(cell, afs_cell_trace_put_queue_fail);
+ queue_work(afs_wq, &cell->manager);
}
/*
- * Allocate a key to use as a placeholder for anonymous user security.
+ * Cell-specific management timer.
*/
-static int afs_alloc_anon_key(struct afs_cell *cell)
+static void afs_cell_timer(struct timer_list *timer)
{
- struct key *key;
- char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp;
-
- /* Create a key to represent an anonymous user. */
- memcpy(keyname, "afs@", 4);
- dp = keyname + 4;
- cp = cell->name;
- do {
- *dp++ = tolower(*cp);
- } while (*cp++);
+ struct afs_cell *cell = container_of(timer, struct afs_cell, management_timer);
- key = rxrpc_get_null_key(keyname);
- if (IS_ERR(key))
- return PTR_ERR(key);
-
- cell->anonymous_key = key;
+ afs_see_cell(cell, afs_cell_trace_see_mgmt_timer);
+ if (refcount_read(&cell->ref) > 0 && cell->net->live)
+ queue_work(afs_wq, &cell->manager);
+}
- _debug("anon key %p{%x}",
- cell->anonymous_key, key_serial(cell->anonymous_key));
- return 0;
+/*
+ * Set/reduce the cell timer.
+ */
+void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs)
+{
+ timer_reduce(&cell->management_timer, jiffies + delay_secs * HZ);
}
/*
@@ -671,12 +725,6 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
struct afs_cell *pcell;
int ret;
- if (!cell->anonymous_key) {
- ret = afs_alloc_anon_key(cell);
- if (ret < 0)
- return ret;
- }
-
ret = afs_proc_cell_setup(cell);
if (ret < 0)
return ret;
@@ -694,7 +742,6 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
if (cell->proc_link.next)
cell->proc_link.next->pprev = &cell->proc_link.next;
- afs_dynroot_mkdir(net, cell);
mutex_unlock(&net->proc_cells_lock);
return 0;
}
@@ -709,242 +756,167 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
afs_proc_cell_remove(cell);
mutex_lock(&net->proc_cells_lock);
- hlist_del_rcu(&cell->proc_link);
- afs_dynroot_rmdir(net, cell);
+ if (!hlist_unhashed(&cell->proc_link))
+ hlist_del_rcu(&cell->proc_link);
mutex_unlock(&net->proc_cells_lock);
_leave("");
}
+static bool afs_has_cell_expired(struct afs_cell *cell, time64_t *_next_manage)
+{
+ const struct afs_vlserver_list *vllist;
+ time64_t expire_at = cell->last_inactive;
+ time64_t now = ktime_get_real_seconds();
+
+ if (atomic_read(&cell->active))
+ return false;
+ if (!cell->net->live)
+ return true;
+
+ vllist = rcu_dereference_protected(cell->vl_servers, true);
+ if (vllist && vllist->nr_servers > 0)
+ expire_at += afs_cell_gc_delay;
+
+ if (expire_at <= now)
+ return true;
+ if (expire_at < *_next_manage)
+ *_next_manage = expire_at;
+ return false;
+}
+
/*
* Manage a cell record, initialising and destroying it, maintaining its DNS
* records.
*/
-static void afs_manage_cell(struct afs_cell *cell)
+static bool afs_manage_cell(struct afs_cell *cell)
{
struct afs_net *net = cell->net;
- int ret, active;
+ time64_t next_manage = TIME64_MAX;
+ int ret;
_enter("%s", cell->name);
-again:
_debug("state %u", cell->state);
switch (cell->state) {
- case AFS_CELL_INACTIVE:
- case AFS_CELL_FAILED:
- down_write(&net->cells_lock);
- active = 1;
- if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) {
- rb_erase(&cell->net_node, &net->cells);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0,
- afs_cell_trace_unuse_delete);
- smp_store_release(&cell->state, AFS_CELL_REMOVED);
- }
- up_write(&net->cells_lock);
- if (cell->state == AFS_CELL_REMOVED) {
- wake_up_var(&cell->state);
- goto final_destruction;
- }
- if (cell->state == AFS_CELL_FAILED)
- goto done;
- smp_store_release(&cell->state, AFS_CELL_UNSET);
- wake_up_var(&cell->state);
- goto again;
-
- case AFS_CELL_UNSET:
- smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
- wake_up_var(&cell->state);
- goto again;
-
- case AFS_CELL_ACTIVATING:
- ret = afs_activate_cell(net, cell);
- if (ret < 0)
- goto activation_failed;
+ case AFS_CELL_SETTING_UP:
+ goto set_up_cell;
+ case AFS_CELL_UNLOOKED:
+ case AFS_CELL_ACTIVE:
+ goto cell_is_active;
+ case AFS_CELL_REMOVING:
+ WARN_ON_ONCE(1);
+ return false;
+ case AFS_CELL_DEAD:
+ return false;
+ default:
+ _debug("bad state %u", cell->state);
+ WARN_ON_ONCE(1); /* Unhandled state */
+ return false;
+ }
- smp_store_release(&cell->state, AFS_CELL_ACTIVE);
- wake_up_var(&cell->state);
- goto again;
+set_up_cell:
+ ret = afs_activate_cell(net, cell);
+ if (ret < 0) {
+ cell->error = ret;
+ goto remove_cell;
+ }
- case AFS_CELL_ACTIVE:
- if (atomic_read(&cell->active) > 1) {
- if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
- ret = afs_update_cell(cell);
- if (ret < 0)
- cell->error = ret;
- }
- goto done;
- }
- smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
- wake_up_var(&cell->state);
- goto again;
+ afs_set_cell_state(cell, AFS_CELL_UNLOOKED);
- case AFS_CELL_DEACTIVATING:
- if (atomic_read(&cell->active) > 1)
- goto reverse_deactivation;
- afs_deactivate_cell(net, cell);
- smp_store_release(&cell->state, AFS_CELL_INACTIVE);
- wake_up_var(&cell->state);
- goto again;
+cell_is_active:
+ if (afs_has_cell_expired(cell, &next_manage))
+ goto remove_cell;
- case AFS_CELL_REMOVED:
- goto done;
+ if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
+ ret = afs_update_cell(cell);
+ if (ret < 0)
+ cell->error = ret;
+ if (cell->state == AFS_CELL_UNLOOKED)
+ afs_set_cell_state(cell, AFS_CELL_ACTIVE);
+ }
- default:
- break;
+ if (next_manage < TIME64_MAX && cell->net->live) {
+ time64_t now = ktime_get_real_seconds();
+
+ if (next_manage - now <= 0)
+ afs_queue_cell(cell, afs_cell_trace_queue_again);
+ else
+ afs_set_cell_timer(cell, next_manage - now);
}
- _debug("bad state %u", cell->state);
- BUG(); /* Unhandled state */
+ _leave(" [done %u]", cell->state);
+ return false;
-activation_failed:
- cell->error = ret;
- afs_deactivate_cell(net, cell);
+remove_cell:
+ down_write(&net->cells_lock);
- smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
- wake_up_var(&cell->state);
- goto again;
+ if (atomic_read(&cell->active)) {
+ up_write(&net->cells_lock);
+ goto cell_is_active;
+ }
-reverse_deactivation:
- smp_store_release(&cell->state, AFS_CELL_ACTIVE);
- wake_up_var(&cell->state);
- _leave(" [deact->act]");
- return;
+ /* Make sure that the expiring server records are going to see the fact
+ * that the cell is caput.
+ */
+ afs_set_cell_state(cell, AFS_CELL_REMOVING);
-done:
- _leave(" [done %u]", cell->state);
- return;
+ afs_deactivate_cell(net, cell);
+ afs_purge_servers(cell);
+
+ rb_erase(&cell->net_node, &net->cells);
+ afs_see_cell(cell, afs_cell_trace_unuse_delete);
+ up_write(&net->cells_lock);
-final_destruction:
/* The root volume is pinning the cell */
- afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root);
+ afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root);
cell->root_volume = NULL;
- afs_put_cell(cell, afs_cell_trace_put_destroy);
+
+ afs_set_cell_state(cell, AFS_CELL_DEAD);
+ return true;
}
static void afs_manage_cell_work(struct work_struct *work)
{
struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+ bool final_put;
- afs_manage_cell(cell);
- afs_put_cell(cell, afs_cell_trace_put_queue_work);
+ afs_see_cell(cell, afs_cell_trace_manage);
+ final_put = afs_manage_cell(cell);
+ afs_see_cell(cell, afs_cell_trace_managed);
+ if (final_put)
+ afs_put_cell(cell, afs_cell_trace_put_final);
}
/*
- * Manage the records of cells known to a network namespace. This includes
- * updating the DNS records and garbage collecting unused cells that were
- * automatically added.
- *
- * Note that constructed cell records may only be removed from net->cells by
- * this work item, so it is safe for this work item to stash a cursor pointing
- * into the tree and then return to caller (provided it skips cells that are
- * still under construction).
- *
- * Note also that we were given an increment on net->cells_outstanding by
- * whoever queued us that we need to deal with before returning.
+ * Purge in-memory cell database.
*/
-void afs_manage_cells(struct work_struct *work)
+void afs_cell_purge(struct afs_net *net)
{
- struct afs_net *net = container_of(work, struct afs_net, cells_manager);
+ struct afs_cell *ws;
struct rb_node *cursor;
- time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
- bool purging = !net->live;
_enter("");
- /* Trawl the cell database looking for cells that have expired from
- * lack of use and cells whose DNS results have expired and dispatch
- * their managers.
- */
- down_read(&net->cells_lock);
+ down_write(&net->cells_lock);
+ ws = rcu_replace_pointer(net->ws_cell, NULL,
+ lockdep_is_held(&net->cells_lock));
+ up_write(&net->cells_lock);
+ afs_unuse_cell(ws, afs_cell_trace_unuse_ws);
+ _debug("kick cells");
+ down_read(&net->cells_lock);
for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
- struct afs_cell *cell =
- rb_entry(cursor, struct afs_cell, net_node);
- unsigned active;
- bool sched_cell = false;
-
- active = atomic_read(&cell->active);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
- active, afs_cell_trace_manage);
-
- ASSERTCMP(active, >=, 1);
-
- if (purging) {
- if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) {
- active = atomic_dec_return(&cell->active);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
- active, afs_cell_trace_unuse_pin);
- }
- }
+ struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node);
- if (active == 1) {
- struct afs_vlserver_list *vllist;
- time64_t expire_at = cell->last_inactive;
-
- read_lock(&cell->vl_servers_lock);
- vllist = rcu_dereference_protected(
- cell->vl_servers,
- lockdep_is_held(&cell->vl_servers_lock));
- if (vllist->nr_servers > 0)
- expire_at += afs_cell_gc_delay;
- read_unlock(&cell->vl_servers_lock);
- if (purging || expire_at <= now)
- sched_cell = true;
- else if (expire_at < next_manage)
- next_manage = expire_at;
- }
+ afs_see_cell(cell, afs_cell_trace_purge);
- if (!purging) {
- if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags))
- sched_cell = true;
- }
+ if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+ afs_unuse_cell(cell, afs_cell_trace_unuse_pin);
- if (sched_cell)
- afs_queue_cell(cell, afs_cell_trace_get_queue_manage);
+ afs_queue_cell(cell, afs_cell_trace_queue_purge);
}
-
up_read(&net->cells_lock);
- /* Update the timer on the way out. We have to pass an increment on
- * cells_outstanding in the namespace that we are in to the timer or
- * the work scheduler.
- */
- if (!purging && next_manage < TIME64_MAX) {
- now = ktime_get_real_seconds();
-
- if (next_manage - now <= 0) {
- if (queue_work(afs_wq, &net->cells_manager))
- atomic_inc(&net->cells_outstanding);
- } else {
- afs_set_cell_timer(net, next_manage - now);
- }
- }
-
- afs_dec_cells_outstanding(net);
- _leave(" [%d]", atomic_read(&net->cells_outstanding));
-}
-
-/*
- * Purge in-memory cell database.
- */
-void afs_cell_purge(struct afs_net *net)
-{
- struct afs_cell *ws;
-
- _enter("");
-
- down_write(&net->cells_lock);
- ws = net->ws_cell;
- net->ws_cell = NULL;
- up_write(&net->cells_lock);
- afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws);
-
- _debug("del timer");
- if (del_timer_sync(&net->cells_timer))
- atomic_dec(&net->cells_outstanding);
-
- _debug("kick mgr");
- afs_queue_cell_manager(net);
-
_debug("wait");
wait_var_event(&net->cells_outstanding,
!atomic_read(&net->cells_outstanding));
diff --git a/fs/afs/cm_security.c b/fs/afs/cm_security.c
new file mode 100644
index 000000000000..edcbd249d202
--- /dev/null
+++ b/fs/afs/cm_security.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Cache manager security.
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <crypto/krb5.h>
+#include "internal.h"
+#include "afs_cm.h"
+#include "afs_fs.h"
+#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c
+#define xdr_round_up(x) (round_up((x), sizeof(__be32)))
+#define xdr_len_object(x) (4 + round_up((x), sizeof(__be32)))
+
+#ifdef CONFIG_RXGK
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+ struct afs_server *server);
+#endif
+
+/*
+ * Respond to an RxGK challenge, adding appdata.
+ */
+static int afs_respond_to_challenge(struct sk_buff *challenge)
+{
+#ifdef CONFIG_RXGK
+ struct krb5_buffer appdata = {};
+ struct afs_server *server;
+#endif
+ struct rxrpc_peer *peer;
+ unsigned long peer_data;
+ u16 service_id;
+ u8 security_index;
+
+ rxrpc_kernel_query_challenge(challenge, &peer, &peer_data,
+ &service_id, &security_index);
+
+ _enter("%u,%u", service_id, security_index);
+
+ switch (service_id) {
+ /* We don't send CM_SERVICE RPCs, so don't expect a challenge
+ * therefrom.
+ */
+ case FS_SERVICE:
+ case VL_SERVICE:
+ case YFS_FS_SERVICE:
+ case YFS_VL_SERVICE:
+ break;
+ default:
+ pr_warn("Can't respond to unknown challenge %u:%u",
+ service_id, security_index);
+ return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+ afs_abort_unsupported_sec_class);
+ }
+
+ switch (security_index) {
+#ifdef CONFIG_RXKAD
+ case RXRPC_SECURITY_RXKAD:
+ return rxkad_kernel_respond_to_challenge(challenge);
+#endif
+
+#ifdef CONFIG_RXGK
+ case RXRPC_SECURITY_RXGK:
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+
+ case RXRPC_SECURITY_YFS_RXGK:
+ switch (service_id) {
+ case FS_SERVICE:
+ case YFS_FS_SERVICE:
+ server = (struct afs_server *)peer_data;
+ if (!server->cm_rxgk_appdata.data) {
+ mutex_lock(&server->cm_token_lock);
+ if (!server->cm_rxgk_appdata.data)
+ afs_create_yfs_cm_token(challenge, server);
+ mutex_unlock(&server->cm_token_lock);
+ }
+ if (server->cm_rxgk_appdata.data)
+ appdata = server->cm_rxgk_appdata;
+ break;
+ }
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+#endif
+
+ default:
+ return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+ afs_abort_unsupported_sec_class);
+ }
+}
+
+/*
+ * Process the OOB message queue, processing challenge packets.
+ */
+void afs_process_oob_queue(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, rx_oob_work);
+ struct sk_buff *oob;
+ enum rxrpc_oob_type type;
+
+ while ((oob = rxrpc_kernel_dequeue_oob(net->socket, &type))) {
+ switch (type) {
+ case RXRPC_OOB_CHALLENGE:
+ afs_respond_to_challenge(oob);
+ break;
+ }
+ rxrpc_kernel_free_oob(oob);
+ }
+}
+
+#ifdef CONFIG_RXGK
+/*
+ * Create a securities keyring for the cache manager and attach a key to it for
+ * the RxGK tokens we want to use to secure the callback connection back from
+ * the fileserver.
+ */
+int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+ const struct krb5_enctype *krb5;
+ struct key *ring;
+ key_ref_t key;
+ char K0[32], *desc;
+ int ret;
+
+ ring = keyring_alloc("kafs",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ KEY_POS_SEARCH | KEY_POS_WRITE |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(ring))
+ return PTR_ERR(ring);
+
+ ret = rxrpc_sock_set_security_keyring(socket->sk, ring);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOPKG;
+ krb5 = crypto_krb5_find_enctype(KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96);
+ if (!krb5)
+ goto out;
+
+ if (WARN_ON_ONCE(krb5->key_len > sizeof(K0)))
+ goto out;
+
+ ret = -ENOMEM;
+ desc = kasprintf(GFP_KERNEL, "%u:%u:%u:%u",
+ YFS_CM_SERVICE, RXRPC_SECURITY_YFS_RXGK, 1, krb5->etype);
+ if (!desc)
+ goto out;
+
+ wait_for_random_bytes();
+ get_random_bytes(K0, krb5->key_len);
+
+ key = key_create(make_key_ref(ring, true),
+ "rxrpc_s", desc,
+ K0, krb5->key_len,
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ kfree(desc);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto out;
+ }
+
+ net->fs_cm_token_key = key_ref_to_ptr(key);
+ ret = 0;
+out:
+ key_put(ring);
+ return ret;
+}
+
+/*
+ * Create an YFS RxGK GSS token to use as a ticket to the specified fileserver.
+ */
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+ struct afs_server *server)
+{
+ const struct krb5_enctype *conn_krb5, *token_krb5;
+ const struct krb5_buffer *token_key;
+ struct crypto_aead *aead;
+ struct scatterlist sg;
+ struct afs_net *net = server->cell->net;
+ const struct key *key = net->fs_cm_token_key;
+ size_t keysize, uuidsize, authsize, toksize, encsize, contsize, adatasize, offset;
+ __be32 caps[1] = {
+ [0] = htonl(AFS_CAP_ERROR_TRANSLATION),
+ };
+ __be32 *xdr;
+ void *appdata, *K0, *encbase;
+ u32 enctype;
+ int ret;
+
+ if (!key)
+ return -ENOKEY;
+
+ /* Assume that the fileserver is happy to use the same encoding type as
+ * we were told to use by the token obtained by the user.
+ */
+ enctype = rxgk_kernel_query_challenge(challenge);
+
+ conn_krb5 = crypto_krb5_find_enctype(enctype);
+ if (!conn_krb5)
+ return -ENOPKG;
+ token_krb5 = key->payload.data[0];
+ token_key = (const struct krb5_buffer *)&key->payload.data[2];
+
+ /* struct rxgk_key {
+ * afs_uint32 enctype;
+ * opaque key<>;
+ * };
+ */
+ keysize = 4 + xdr_len_object(conn_krb5->key_len);
+
+ /* struct RXGK_AuthName {
+ * afs_int32 kind;
+ * opaque data<AUTHDATAMAX>;
+ * opaque display<AUTHPRINTABLEMAX>;
+ * };
+ */
+ uuidsize = sizeof(server->uuid);
+ authsize = 4 + xdr_len_object(uuidsize) + xdr_len_object(0);
+
+ /* struct RXGK_Token {
+ * rxgk_key K0;
+ * RXGK_Level level;
+ * rxgkTime starttime;
+ * afs_int32 lifetime;
+ * afs_int32 bytelife;
+ * rxgkTime expirationtime;
+ * struct RXGK_AuthName identities<>;
+ * };
+ */
+ toksize = keysize + 8 + 4 + 4 + 8 + xdr_len_object(authsize);
+
+ offset = 0;
+ encsize = crypto_krb5_how_much_buffer(token_krb5, KRB5_ENCRYPT_MODE, toksize, &offset);
+
+ /* struct RXGK_TokenContainer {
+ * afs_int32 kvno;
+ * afs_int32 enctype;
+ * opaque encrypted_token<>;
+ * };
+ */
+ contsize = 4 + 4 + xdr_len_object(encsize);
+
+ /* struct YFSAppData {
+ * opr_uuid initiatorUuid;
+ * opr_uuid acceptorUuid;
+ * Capabilities caps;
+ * afs_int32 enctype;
+ * opaque callbackKey<>;
+ * opaque callbackToken<>;
+ * };
+ */
+ adatasize = 16 + 16 +
+ xdr_len_object(sizeof(caps)) +
+ 4 +
+ xdr_len_object(conn_krb5->key_len) +
+ xdr_len_object(contsize);
+
+ ret = -ENOMEM;
+ appdata = kzalloc(adatasize, GFP_KERNEL);
+ if (!appdata)
+ goto out;
+ xdr = appdata;
+
+ memcpy(xdr, &net->uuid, 16); /* appdata.initiatorUuid */
+ xdr += 16 / 4;
+ memcpy(xdr, &server->uuid, 16); /* appdata.acceptorUuid */
+ xdr += 16 / 4;
+ *xdr++ = htonl(ARRAY_SIZE(caps)); /* appdata.caps.len */
+ memcpy(xdr, &caps, sizeof(caps)); /* appdata.caps */
+ xdr += ARRAY_SIZE(caps);
+ *xdr++ = htonl(conn_krb5->etype); /* appdata.enctype */
+
+ *xdr++ = htonl(conn_krb5->key_len); /* appdata.callbackKey.len */
+ K0 = xdr;
+ get_random_bytes(K0, conn_krb5->key_len); /* appdata.callbackKey.data */
+ xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+ *xdr++ = htonl(contsize); /* appdata.callbackToken.len */
+ *xdr++ = htonl(1); /* cont.kvno */
+ *xdr++ = htonl(token_krb5->etype); /* cont.enctype */
+ *xdr++ = htonl(encsize); /* cont.encrypted_token.len */
+
+ encbase = xdr;
+ xdr += offset / 4;
+ *xdr++ = htonl(conn_krb5->etype); /* token.K0.enctype */
+ *xdr++ = htonl(conn_krb5->key_len); /* token.K0.key.len */
+ memcpy(xdr, K0, conn_krb5->key_len); /* token.K0.key.data */
+ xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+ *xdr++ = htonl(RXRPC_SECURITY_ENCRYPT); /* token.level */
+ *xdr++ = htonl(0); /* token.starttime */
+ *xdr++ = htonl(0); /* " */
+ *xdr++ = htonl(0); /* token.lifetime */
+ *xdr++ = htonl(0); /* token.bytelife */
+ *xdr++ = htonl(0); /* token.expirationtime */
+ *xdr++ = htonl(0); /* " */
+ *xdr++ = htonl(1); /* token.identities.count */
+ *xdr++ = htonl(0); /* token.identities[0].kind */
+ *xdr++ = htonl(uuidsize); /* token.identities[0].data.len */
+ memcpy(xdr, &server->uuid, uuidsize);
+ xdr += xdr_round_up(uuidsize) / 4;
+ *xdr++ = htonl(0); /* token.identities[0].display.len */
+
+ xdr = encbase + xdr_round_up(encsize);
+
+ if ((unsigned long)xdr - (unsigned long)appdata != adatasize)
+ pr_err("Appdata size incorrect %lx != %zx\n",
+ (unsigned long)xdr - (unsigned long)appdata, adatasize);
+
+ aead = crypto_krb5_prepare_encryption(token_krb5, token_key, RXGK_SERVER_ENC_TOKEN,
+ GFP_KERNEL);
+ if (IS_ERR(aead)) {
+ ret = PTR_ERR(aead);
+ goto out_token;
+ }
+
+ sg_init_one(&sg, encbase, encsize);
+ ret = crypto_krb5_encrypt(token_krb5, aead, &sg, 1, encsize, offset, toksize, false);
+ if (ret < 0)
+ goto out_aead;
+
+ server->cm_rxgk_appdata.len = adatasize;
+ server->cm_rxgk_appdata.data = appdata;
+ appdata = NULL;
+
+out_aead:
+ crypto_free_aead(aead);
+out_token:
+ kfree(appdata);
+out:
+ return ret;
+}
+#endif /* CONFIG_RXGK */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index d4ddb20d6732..1a906805a9e3 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -139,48 +139,6 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
/*
- * Find the server record by peer address and record a probe to the cache
- * manager from a server.
- */
-static int afs_find_cm_server_by_peer(struct afs_call *call)
-{
- struct sockaddr_rxrpc srx;
- struct afs_server *server;
-
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
- server = afs_find_server(call->net, &srx);
- if (!server) {
- trace_afs_cm_no_server(call, &srx);
- return 0;
- }
-
- call->server = server;
- return 0;
-}
-
-/*
- * Find the server record by server UUID and record a probe to the cache
- * manager from a server.
- */
-static int afs_find_cm_server_by_uuid(struct afs_call *call,
- struct afs_uuid *uuid)
-{
- struct afs_server *server;
-
- rcu_read_lock();
- server = afs_find_server_by_uuid(call->net, call->request);
- rcu_read_unlock();
- if (!server) {
- trace_afs_cm_no_server_u(call, call->request);
- return 0;
- }
-
- call->server = server;
- return 0;
-}
-
-/*
* Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
@@ -321,10 +279,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- /* we'll need the file server record as that tells us which set of
- * vnodes to operate upon */
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -348,18 +303,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
*/
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
- int ret;
-
_enter("");
afs_extract_discard(call, 0);
- ret = afs_extract_data(call, false);
- if (ret < 0)
- return ret;
-
- /* we'll need the file server record as that tells us which set of
- * vnodes to operate upon */
- return afs_find_cm_server_by_peer(call);
+ return afs_extract_data(call, false);
}
/*
@@ -372,8 +319,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
__be32 *b;
int ret;
- _enter("");
-
_enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
@@ -420,9 +365,13 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- /* we'll need the file server record as that tells us which set of
- * vnodes to operate upon */
- return afs_find_cm_server_by_uuid(call, call->request);
+ if (memcmp(call->request, &call->server->_uuid, sizeof(call->server->_uuid)) != 0) {
+ pr_notice("Callback UUID does not match fileserver UUID\n");
+ trace_afs_cm_no_server_u(call, call->request);
+ return 0;
+ }
+
+ return 0;
}
/*
@@ -454,7 +403,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -532,7 +481,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -592,7 +541,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -666,9 +615,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- /* We'll need the file server record as that tells us which set of
- * vnodes to operate upon.
- */
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b7c1f8c84b38..f4e9e12373ac 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -12,6 +12,8 @@
#include <linux/swap.h>
#include <linux/ctype.h>
#include <linux/sched.h>
+#include <linux/iversion.h>
+#include <linux/iov_iter.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
#include "afs_fs.h"
@@ -21,35 +23,27 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
static int afs_dir_open(struct inode *inode, struct file *file);
static int afs_readdir(struct file *file, struct dir_context *ctx);
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
+static int afs_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags);
static int afs_d_delete(const struct dentry *dentry);
static void afs_d_iput(struct dentry *dentry, struct inode *inode);
static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl);
-static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode);
+static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode);
static int afs_rmdir(struct inode *dir, struct dentry *dentry);
static int afs_unlink(struct inode *dir, struct dentry *dentry);
static int afs_link(struct dentry *from, struct inode *dir,
struct dentry *dentry);
-static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *content);
-static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
-static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags);
-static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- size_t length);
-
-static bool afs_dir_dirty_folio(struct address_space *mapping,
- struct folio *folio)
-{
- BUG(); /* This should never happen. */
-}
const struct file_operations afs_dir_file_operations = {
.open = afs_dir_open,
@@ -74,10 +68,7 @@ const struct inode_operations afs_dir_inode_operations = {
};
const struct address_space_operations afs_dir_aops = {
- .dirty_folio = afs_dir_dirty_folio,
- .release_folio = afs_dir_release_folio,
- .invalidate_folio = afs_dir_invalidate_folio,
- .migrate_folio = filemap_migrate_folio,
+ .writepages = afs_single_writepages,
};
const struct dentry_operations afs_fs_dentry_operations = {
@@ -98,152 +89,124 @@ struct afs_lookup_one_cookie {
struct afs_lookup_cookie {
struct dir_context ctx;
struct qstr name;
- bool found;
- bool one_only;
unsigned short nr_fids;
struct afs_fid fids[50];
};
+static void afs_dir_unuse_cookie(struct afs_vnode *dvnode, int ret)
+{
+ if (ret == 0) {
+ struct afs_vnode_cache_aux aux;
+ loff_t i_size = i_size_read(&dvnode->netfs.inode);
+
+ afs_set_cache_aux(dvnode, &aux);
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), &aux, &i_size);
+ } else {
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ }
+}
+
/*
- * Drop the refs that we're holding on the folios we were reading into. We've
- * got refs on the first nr_pages pages.
+ * Iterate through a kmapped directory segment, dumping a summary of
+ * the contents.
*/
-static void afs_dir_read_cleanup(struct afs_read *req)
+static size_t afs_dir_dump_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- struct address_space *mapping = req->vnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
+ do {
+ union afs_xdr_dir_block *block = iter_base;
- XA_STATE(xas, &mapping->i_pages, 0);
+ pr_warn("[%05zx] %32phN\n", progress, block);
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ progress += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
- if (unlikely(!req->nr_pages))
- return;
+ return len;
+}
- rcu_read_lock();
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
- BUG_ON(xa_is_value(folio));
- ASSERTCMP(folio_file_mapping(folio), ==, mapping);
+/*
+ * Dump the contents of a directory.
+ */
+static void afs_dir_dump(struct afs_vnode *dvnode)
+{
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
- folio_put(folio);
- }
+ pr_warn("DIR %llx:%llx is=%llx\n",
+ dvnode->fid.vid, dvnode->fid.vnode, i_size);
- rcu_read_unlock();
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ iterate_folioq(&iter, iov_iter_count(&iter), NULL, NULL,
+ afs_dir_dump_step);
}
/*
* check that a directory folio is valid
*/
-static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
- loff_t i_size)
+static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress,
+ union afs_xdr_dir_block *block)
{
- union afs_xdr_dir_block *block;
- size_t offset, size;
- loff_t pos;
+ if (block->hdr.magic != AFS_DIR_MAGIC) {
+ pr_warn("%s(%lx): [%zx] bad magic %04x\n",
+ __func__, dvnode->netfs.inode.i_ino,
+ progress, ntohs(block->hdr.magic));
+ trace_afs_dir_check_failed(dvnode, progress);
+ trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
+ return false;
+ }
- /* Determine how many magic numbers there should be in this folio, but
- * we must take care because the directory may change size under us.
+ /* Make sure each block is NUL terminated so we can reasonably
+ * use string functions on it. The filenames in the folio
+ * *should* be NUL-terminated anyway.
*/
- pos = folio_pos(folio);
- if (i_size <= pos)
- goto checked;
-
- size = min_t(loff_t, folio_size(folio), i_size - pos);
- for (offset = 0; offset < size; offset += sizeof(*block)) {
- block = kmap_local_folio(folio, offset);
- if (block->hdr.magic != AFS_DIR_MAGIC) {
- printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
- __func__, dvnode->netfs.inode.i_ino,
- pos, offset, size, ntohs(block->hdr.magic));
- trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
- kunmap_local(block);
- trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
- goto error;
- }
-
- /* Make sure each block is NUL terminated so we can reasonably
- * use string functions on it. The filenames in the folio
- * *should* be NUL-terminated anyway.
- */
- ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
-
- kunmap_local(block);
- }
-checked:
+ ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
afs_stat_v(dvnode, n_read_dir);
return true;
-
-error:
- return false;
}
/*
- * Dump the contents of a directory.
+ * Iterate through a kmapped directory segment, checking the content.
*/
-static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
+static size_t afs_dir_check_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- union afs_xdr_dir_block *block;
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
- size_t offset, size;
-
- XA_STATE(xas, &mapping->i_pages, 0);
-
- pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
- dvnode->fid.vid, dvnode->fid.vnode,
- req->file_size, req->len, req->actual_len);
- pr_warn("DIR %llx %x %zx %zx\n",
- req->pos, req->nr_pages,
- req->iter->iov_offset, iov_iter_count(req->iter));
-
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
+ struct afs_vnode *dvnode = priv;
- BUG_ON(folio_file_mapping(folio) != mapping);
+ if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ len % AFS_DIR_BLOCK_SIZE))
+ return len;
- size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
- for (offset = 0; offset < size; offset += sizeof(*block)) {
- block = kmap_local_folio(folio, offset);
- pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
- kunmap_local(block);
- }
- }
+ do {
+ if (!afs_dir_check_block(dvnode, progress, iter_base))
+ break;
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
+
+ return len;
}
/*
- * Check all the blocks in a directory. All the folios are held pinned.
+ * Check all the blocks in a directory.
*/
-static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+static int afs_dir_check(struct afs_vnode *dvnode)
{
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
- int ret = 0;
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
+ size_t checked = 0;
- XA_STATE(xas, &mapping->i_pages, 0);
-
- if (unlikely(!req->nr_pages))
+ if (unlikely(!i_size))
return 0;
- rcu_read_lock();
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
-
- BUG_ON(folio_file_mapping(folio) != mapping);
-
- if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
- afs_dir_dump(dvnode, req);
- ret = -EIO;
- break;
- }
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ checked = iterate_folioq(&iter, iov_iter_count(&iter), dvnode, NULL,
+ afs_dir_check_step);
+ if (checked != i_size) {
+ afs_dir_dump(dvnode);
+ return -EIO;
}
-
- rcu_read_unlock();
- return ret;
+ return 0;
}
/*
@@ -263,130 +226,140 @@ static int afs_dir_open(struct inode *inode, struct file *file)
}
/*
- * Read the directory into the pagecache in one go, scrubbing the previous
- * contents. The list of folios is returned, pinning them so that they don't
- * get reclaimed during the iteration.
+ * Read a file in a single download.
*/
-static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
- __acquires(&dvnode->validate_lock)
+static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
{
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct afs_read *req;
+ struct iov_iter iter;
+ ssize_t ret;
loff_t i_size;
- int nr_pages, i;
- int ret;
-
- _enter("");
-
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- return ERR_PTR(-ENOMEM);
+ bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
- refcount_set(&req->usage, 1);
- req->vnode = dvnode;
- req->key = key_get(key);
- req->cleanup = afs_dir_read_cleanup;
-
-expand:
i_size = i_size_read(&dvnode->netfs.inode);
- if (i_size < 2048) {
- ret = afs_bad(dvnode, afs_file_error_dir_small);
- goto error;
- }
- if (i_size > 2048 * 1024) {
- trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- ret = -EFBIG;
- goto error;
+ if (is_dir) {
+ if (i_size < AFS_DIR_BLOCK_SIZE)
+ return afs_bad(dvnode, afs_file_error_dir_small);
+ if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
+ } else {
+ if (i_size > AFSPATHMAX) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
}
- _enter("%llu", i_size);
+ /* Expand the storage. TODO: Shrink the storage too. */
+ if (dvnode->directory_size < i_size) {
+ size_t cur_size = dvnode->directory_size;
- nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
+ ret = netfs_alloc_folioq_buffer(NULL,
+ &dvnode->directory, &cur_size, i_size,
+ mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ dvnode->directory_size = cur_size;
+ if (ret < 0)
+ return ret;
+ }
- req->actual_len = i_size; /* May change */
- req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
- req->data_version = dvnode->status.data_version; /* May change */
- iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages,
- 0, i_size);
- req->iter = &req->def_iter;
+ iov_iter_folio_queue(&iter, ITER_DEST, dvnode->directory, 0, 0, dvnode->directory_size);
- /* Fill in any gaps that we might find where the memory reclaimer has
- * been at work and pin all the folios. If there are any gaps, we will
- * need to reread the entire directory contents.
+ /* AFS requires us to perform the read of a directory synchronously as
+ * a single unit to avoid issues with the directory contents being
+ * changed between reads.
*/
- i = req->nr_pages;
- while (i < nr_pages) {
- struct folio *folio;
-
- folio = filemap_get_folio(mapping, i);
- if (!folio) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_inval);
-
- ret = -ENOMEM;
- folio = __filemap_get_folio(mapping,
- i, FGP_LOCK | FGP_CREAT,
- mapping->gfp_mask);
- if (!folio)
- goto error;
- folio_attach_private(folio, (void *)1);
- folio_unlock(folio);
+ ret = netfs_read_single(&dvnode->netfs.inode, file, &iter);
+ if (ret >= 0) {
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size > ret) {
+ /* The content has grown, so we need to expand the
+ * buffer.
+ */
+ ret = -ESTALE;
+ } else if (is_dir) {
+ int ret2 = afs_dir_check(dvnode);
+
+ if (ret2 < 0)
+ ret = ret2;
+ } else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
+ /* NUL-terminate a symlink. */
+ char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
+
+ symlink[i_size] = 0;
+ kunmap_local(symlink);
}
-
- req->nr_pages += folio_nr_pages(folio);
- i += folio_nr_pages(folio);
}
- /* If we're going to reload, we need to lock all the pages to prevent
- * races.
- */
+ return ret;
+}
+
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
+{
+ ssize_t ret;
+
+ fscache_use_cookie(afs_vnode_cache(dvnode), false);
+ ret = afs_do_read_single(dvnode, file);
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ return ret;
+}
+
+/*
+ * Read the directory into a folio_queue buffer in one go, scrubbing the
+ * previous contents. We return -ESTALE if the caller needs to call us again.
+ */
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ __acquires(&dvnode->validate_lock)
+{
+ ssize_t ret;
+ loff_t i_size;
+
+ i_size = i_size_read(&dvnode->netfs.inode);
+
ret = -ERESTARTSYS;
if (down_read_killable(&dvnode->validate_lock) < 0)
goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ /* We only need to reread the data if it became invalid - or if we
+ * haven't read it yet.
+ */
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ ret = i_size;
+ goto valid;
+ }
up_read(&dvnode->validate_lock);
if (down_write_killable(&dvnode->validate_lock) < 0)
goto error;
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- trace_afs_reload_dir(dvnode);
- ret = afs_fetch_data(dvnode, req);
- if (ret < 0)
- goto error_unlock;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_invalidate_cache(dvnode, 0);
- task_io_account_read(PAGE_SIZE * req->nr_pages);
-
- if (req->len < req->file_size) {
- /* The content has grown, so we need to expand the
- * buffer.
- */
- up_write(&dvnode->validate_lock);
- goto expand;
- }
-
- /* Validate the data we just read. */
- ret = afs_dir_check(dvnode, req);
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) ||
+ !test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ trace_afs_reload_dir(dvnode);
+ ret = afs_read_single(dvnode, file);
if (ret < 0)
goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+ } else {
+ ret = i_size;
}
downgrade_write(&dvnode->validate_lock);
-success:
- return req;
+valid:
+ return ret;
error_unlock:
up_write(&dvnode->validate_lock);
error:
- afs_put_read(req);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
+ _leave(" = %zd", ret);
+ return ret;
}
/*
@@ -394,79 +367,69 @@ error:
*/
static int afs_dir_iterate_block(struct afs_vnode *dvnode,
struct dir_context *ctx,
- union afs_xdr_dir_block *block,
- unsigned blkoff)
+ union afs_xdr_dir_block *block)
{
union afs_xdr_dirent *dire;
- unsigned offset, next, curr, nr_slots;
+ unsigned int blknum, base, hdr, pos, next, nr_slots;
size_t nlen;
int tmp;
- _enter("%llx,%x", ctx->pos, blkoff);
+ blknum = ctx->pos / AFS_DIR_BLOCK_SIZE;
+ base = blknum * AFS_DIR_SLOTS_PER_BLOCK;
+ hdr = (blknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+ pos = DIV_ROUND_UP(ctx->pos, AFS_DIR_DIRENT_SIZE) - base;
- curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
+ _enter("%llx,%x", ctx->pos, blknum);
/* walk through the block, an entry at a time */
- for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
- offset < AFS_DIR_SLOTS_PER_BLOCK;
- offset = next
- ) {
+ for (unsigned int slot = hdr; slot < AFS_DIR_SLOTS_PER_BLOCK; slot = next) {
/* skip entries marked unused in the bitmap */
- if (!(block->hdr.bitmap[offset / 8] &
- (1 << (offset % 8)))) {
- _debug("ENT[%zu.%u]: unused",
- blkoff / sizeof(union afs_xdr_dir_block), offset);
- next = offset + 1;
- if (offset >= curr)
- ctx->pos = blkoff +
- next * sizeof(union afs_xdr_dirent);
+ if (!(block->hdr.bitmap[slot / 8] &
+ (1 << (slot % 8)))) {
+ _debug("ENT[%x]: Unused", base + slot);
+ next = slot + 1;
+ if (next >= pos)
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
continue;
}
/* got a valid entry */
- dire = &block->dirents[offset];
+ dire = &block->dirents[slot];
nlen = strnlen(dire->u.name,
- sizeof(*block) -
- offset * sizeof(union afs_xdr_dirent));
+ (unsigned long)(block + 1) - (unsigned long)dire->u.name - 1);
if (nlen > AFSNAMEMAX - 1) {
- _debug("ENT[%zu]: name too long (len %u/%zu)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, nlen);
+ _debug("ENT[%x]: Name too long (len %zx)",
+ base + slot, nlen);
return afs_bad(dvnode, afs_file_error_dir_name_too_long);
}
- _debug("ENT[%zu.%u]: %s %zu \"%s\"",
- blkoff / sizeof(union afs_xdr_dir_block), offset,
- (offset < curr ? "skip" : "fill"),
+ _debug("ENT[%x]: %s %zx \"%s\"",
+ base + slot, (slot < pos ? "skip" : "fill"),
nlen, dire->u.name);
nr_slots = afs_dir_calc_slots(nlen);
- next = offset + nr_slots;
+ next = slot + nr_slots;
if (next > AFS_DIR_SLOTS_PER_BLOCK) {
- _debug("ENT[%zu.%u]:"
- " %u extends beyond end dir block"
- " (len %zu)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, next, nlen);
+ _debug("ENT[%x]: extends beyond end dir block (len %zx)",
+ base + slot, nlen);
return afs_bad(dvnode, afs_file_error_dir_over_end);
}
/* Check that the name-extension dirents are all allocated */
for (tmp = 1; tmp < nr_slots; tmp++) {
- unsigned int ix = offset + tmp;
- if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) {
- _debug("ENT[%zu.u]:"
- " %u unmarked extension (%u/%u)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, tmp, nr_slots);
+ unsigned int xslot = slot + tmp;
+
+ if (!(block->hdr.bitmap[xslot / 8] & (1 << (xslot % 8)))) {
+ _debug("ENT[%x]: Unmarked extension (%x/%x)",
+ base + slot, tmp, nr_slots);
return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
}
}
/* skip if starts before the current position */
- if (offset < curr) {
- if (next > curr)
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ if (slot < pos) {
+ if (next > pos)
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
continue;
}
@@ -480,75 +443,110 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
return 0;
}
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
}
_leave(" = 1 [more]");
return 1;
}
+struct afs_dir_iteration_ctx {
+ struct dir_context *dir_ctx;
+ int error;
+};
+
/*
- * iterate through the data blob that lists the contents of an AFS directory
+ * Iterate through a kmapped directory segment.
*/
-static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
- struct key *key, afs_dataversion_t *_dir_version)
+static size_t afs_dir_iterate_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- struct afs_vnode *dvnode = AFS_FS_I(dir);
- union afs_xdr_dir_block *dblock;
- struct afs_read *req;
- struct folio *folio;
- unsigned offset, size;
+ struct afs_dir_iteration_ctx *ctx = priv2;
+ struct afs_vnode *dvnode = priv;
int ret;
- _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
-
- if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
- _leave(" = -ESTALE");
- return -ESTALE;
+ if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ len % AFS_DIR_BLOCK_SIZE)) {
+ pr_err("Mis-iteration prog=%zx len=%zx\n",
+ progress % AFS_DIR_BLOCK_SIZE,
+ len % AFS_DIR_BLOCK_SIZE);
+ return len;
}
- req = afs_read_dir(dvnode, key);
- if (IS_ERR(req))
- return PTR_ERR(req);
- *_dir_version = req->data_version;
+ do {
+ ret = afs_dir_iterate_block(dvnode, ctx->dir_ctx, iter_base);
+ if (ret != 1)
+ break;
- /* round the file position up to the next entry boundary */
- ctx->pos += sizeof(union afs_xdr_dirent) - 1;
- ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1);
+ ctx->dir_ctx->pos = round_up(ctx->dir_ctx->pos, AFS_DIR_BLOCK_SIZE);
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
- /* walk through the blocks in sequence */
- ret = 0;
- while (ctx->pos < req->actual_len) {
- /* Fetch the appropriate folio from the directory and re-add it
- * to the LRU. We have all the pages pinned with an extra ref.
- */
- folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
- FGP_ACCESSED, 0);
- if (!folio) {
- ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
- break;
- }
+ return len;
+}
- offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
- size = min_t(loff_t, folio_size(folio),
- req->actual_len - folio_file_pos(folio));
+/*
+ * Iterate through the directory folios.
+ */
+static int afs_dir_iterate_contents(struct inode *dir, struct dir_context *dir_ctx)
+{
+ struct afs_dir_iteration_ctx ctx = { .dir_ctx = dir_ctx };
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(dir);
- do {
- dblock = kmap_local_folio(folio, offset);
- ret = afs_dir_iterate_block(dvnode, ctx, dblock,
- folio_file_pos(folio) + offset);
- kunmap_local(dblock);
- if (ret != 1)
- goto out;
+ /* Round the file position up to the next entry boundary */
+ dir_ctx->pos = round_up(dir_ctx->pos, sizeof(union afs_xdr_dirent));
- } while (offset += sizeof(*dblock), offset < size);
+ if (i_size <= 0 || dir_ctx->pos >= i_size)
+ return 0;
- ret = 0;
- }
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ iov_iter_advance(&iter, round_down(dir_ctx->pos, AFS_DIR_BLOCK_SIZE));
+
+ iterate_folioq(&iter, iov_iter_count(&iter), dvnode, &ctx,
+ afs_dir_iterate_step);
+
+ if (ctx.error == -ESTALE)
+ afs_invalidate_dir(dvnode, afs_dir_invalid_iter_stale);
+ return ctx.error;
+}
+
+/*
+ * iterate through the data blob that lists the contents of an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
+ struct file *file, afs_dataversion_t *_dir_version)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int retry_limit = 100;
+ int ret;
+
+ _enter("{%lu},%llx,,", dir->i_ino, ctx->pos);
+
+ do {
+ if (--retry_limit < 0) {
+ pr_warn("afs_read_dir(): Too many retries\n");
+ ret = -ESTALE;
+ break;
+ }
+ ret = afs_read_dir(dvnode, file);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ break;
+ if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+ ret = -ESTALE;
+ break;
+ }
+ continue;
+ }
+ *_dir_version = inode_peek_iversion_raw(dir);
+
+ ret = afs_dir_iterate_contents(dir, ctx);
+ up_read(&dvnode->validate_lock);
+ } while (ret == -ESTALE);
-out:
- up_read(&dvnode->validate_lock);
- afs_put_read(req);
_leave(" = %d", ret);
return ret;
}
@@ -560,8 +558,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
{
afs_dataversion_t dir_version;
- return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
- &dir_version);
+ return afs_dir_iterate(file_inode(file), ctx, file, &dir_version);
}
/*
@@ -601,22 +598,22 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
* Do a lookup of a single name in a directory
* - just returns the FID the dentry name maps to if found
*/
-static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
- struct afs_fid *fid, struct key *key,
+static int afs_do_lookup_one(struct inode *dir, const struct qstr *name,
+ struct afs_fid *fid,
afs_dataversion_t *_dir_version)
{
struct afs_super_info *as = dir->i_sb->s_fs_info;
struct afs_lookup_one_cookie cookie = {
.ctx.actor = afs_lookup_one_filldir,
- .name = dentry->d_name,
+ .name = *name,
.fid.vid = as->volume->vid
};
int ret;
- _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+ _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name);
/* search the directory */
- ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
+ ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
if (ret < 0) {
_leave(" = %d [iter]", ret);
return ret;
@@ -651,19 +648,10 @@ static bool afs_lookup_filldir(struct dir_context *ctx, const char *name,
BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
- if (cookie->found) {
- if (cookie->nr_fids < 50) {
- cookie->fids[cookie->nr_fids].vnode = ino;
- cookie->fids[cookie->nr_fids].unique = dtype;
- cookie->nr_fids++;
- }
- } else if (cookie->name.len == nlen &&
- memcmp(cookie->name.name, name, nlen) == 0) {
- cookie->fids[1].vnode = ino;
- cookie->fids[1].unique = dtype;
- cookie->found = 1;
- if (cookie->one_only)
- return false;
+ if (cookie->nr_fids < 50) {
+ cookie->fids[cookie->nr_fids].vnode = ino;
+ cookie->fids[cookie->nr_fids].unique = dtype;
+ cookie->nr_fids++;
}
return cookie->nr_fids < 50;
@@ -689,8 +677,9 @@ static void afs_do_lookup_success(struct afs_operation *op)
vp = &op->file[0];
abort_code = vp->scb.status.abort_code;
if (abort_code != 0) {
- op->ac.abort_code = abort_code;
- op->error = afs_abort_to_error(abort_code);
+ op->call_abort_code = abort_code;
+ afs_op_set_error(op, afs_abort_to_error(abort_code));
+ op->cumul_error.abort_code = abort_code;
}
break;
@@ -703,6 +692,8 @@ static void afs_do_lookup_success(struct afs_operation *op)
break;
}
+ if (vp->scb.status.abort_code)
+ trace_afs_bulkstat_error(op, &vp->fid, i, vp->scb.status.abort_code);
if (!vp->scb.have_status && !vp->scb.have_error)
continue;
@@ -780,8 +771,7 @@ static bool afs_server_supports_ibulk(struct afs_vnode *dvnode)
* files in one go and create inodes for them. The inode of the file we were
* asked for is returned.
*/
-static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
- struct key *key)
+static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
{
struct afs_lookup_cookie *cookie;
struct afs_vnode_param *vp;
@@ -789,6 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct inode *inode = NULL, *ti;
afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
+ bool supports_ibulk, isnew;
long ret;
int i;
@@ -802,22 +793,22 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->fids[i].vid = dvnode->fid.vid;
cookie->ctx.actor = afs_lookup_filldir;
cookie->name = dentry->d_name;
- cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want
- * and slot 1 for the directory */
+ cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want
+ * and slot 0 for the directory */
- if (!afs_server_supports_ibulk(dvnode))
- cookie->one_only = true;
-
- /* search the directory */
- ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
+ /* Search the directory for the named entry using the hash table... */
+ ret = afs_dir_search(dvnode, &dentry->d_name, &cookie->fids[1], &data_version);
if (ret < 0)
goto out;
- dentry->d_fsdata = (void *)(unsigned long)data_version;
+ supports_ibulk = afs_server_supports_ibulk(dvnode);
+ if (supports_ibulk) {
+ /* ...then scan linearly from that point for entries to lookup-ahead. */
+ cookie->ctx.pos = (ret + 1) * AFS_DIR_DIRENT_SIZE;
+ afs_dir_iterate(dir, &cookie->ctx, NULL, &data_version);
+ }
- ret = -ENOENT;
- if (!cookie->found)
- goto out;
+ dentry->d_fsdata = (void *)(unsigned long)data_version;
/* Check to see if we already have an inode for the primary fid. */
inode = ilookup5(dir->i_sb, cookie->fids[1].vnode,
@@ -842,13 +833,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
_debug("nr_files %u", op->nr_files);
/* Need space for examining all the selected files */
- op->error = -ENOMEM;
if (op->nr_files > 2) {
op->more_files = kvcalloc(op->nr_files - 2,
sizeof(struct afs_vnode_param),
GFP_KERNEL);
- if (!op->more_files)
+ if (!op->more_files) {
+ afs_op_nomem(op);
goto out_op;
+ }
for (i = 2; i < op->nr_files; i++) {
vp = &op->more_files[i - 2];
@@ -858,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
* callback counters.
*/
ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
- afs_ilookup5_test_by_fid, &vp->fid);
+ afs_ilookup5_test_by_fid, &vp->fid, &isnew);
if (!IS_ERR_OR_NULL(ti)) {
vnode = AFS_FS_I(ti);
vp->dv_before = vnode->status.data_version;
@@ -874,14 +866,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
* lookups contained therein are stored in the reply without aborting
* the whole operation.
*/
- op->error = -ENOTSUPP;
- if (!cookie->one_only) {
+ afs_op_set_error(op, -ENOTSUPP);
+ if (supports_ibulk) {
op->ops = &afs_inline_bulk_status_operation;
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
- if (op->error == -ENOTSUPP) {
+ if (afs_op_error(op) == -ENOTSUPP) {
/* We could try FS.BulkStatus next, but this aborts the entire
* op if any of the lookups fails - so, for the moment, revert
* to FS.FetchStatus for op->file[1].
@@ -891,12 +883,16 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
- inode = ERR_PTR(op->error);
out_op:
- if (op->error == 0) {
- inode = &op->file[1].vnode->netfs.inode;
- op->file[1].vnode = NULL;
+ if (!afs_op_error(op)) {
+ if (op->file[1].scb.status.abort_code) {
+ afs_op_accumulate_error(op, -ECONNABORTED,
+ op->file[1].scb.status.abort_code);
+ } else {
+ inode = &op->file[1].vnode->netfs.inode;
+ op->file[1].vnode = NULL;
+ }
}
if (op->file[0].scb.have_status)
@@ -913,8 +909,7 @@ out:
/*
* Look up an entry in a directory with @sys substitution.
*/
-static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
- struct key *key)
+static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry)
{
struct afs_sysnames *subs;
struct afs_net *net = afs_i2net(dir);
@@ -948,7 +943,7 @@ static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
}
strcpy(p, name);
- ret = lookup_one_len(buf, dentry->d_parent, len);
+ ret = lookup_noperm(&QSTR(buf), dentry->d_parent);
if (IS_ERR(ret) || d_is_positive(ret))
goto out_s;
dput(ret);
@@ -962,7 +957,6 @@ out_s:
afs_put_sysnames(subs);
kfree(buf);
out_p:
- key_put(key);
return ret;
}
@@ -976,7 +970,6 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
struct afs_fid fid = {};
struct inode *inode;
struct dentry *d;
- struct key *key;
int ret;
_enter("{%llx:%llu},%p{%pd},",
@@ -994,15 +987,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ESTALE);
}
- key = afs_request_key(dvnode->volume->cell);
- if (IS_ERR(key)) {
- _leave(" = %ld [key]", PTR_ERR(key));
- return ERR_CAST(key);
- }
-
- ret = afs_validate(dvnode, key);
+ ret = afs_validate(dvnode, NULL);
if (ret < 0) {
- key_put(key);
+ afs_dir_unuse_cookie(dvnode, ret);
_leave(" = %d [val]", ret);
return ERR_PTR(ret);
}
@@ -1012,15 +999,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
dentry->d_name.name[dentry->d_name.len - 1] == 's')
- return afs_lookup_atsys(dir, dentry, key);
+ return afs_lookup_atsys(dir, dentry);
afs_stat_v(dvnode, n_lookup);
- inode = afs_do_lookup(dir, dentry, key);
- key_put(key);
+ inode = afs_do_lookup(dir, dentry);
if (inode == ERR_PTR(-ENOENT))
- inode = afs_try_auto_mntpt(dentry, dir);
-
- if (!IS_ERR_OR_NULL(inode))
+ inode = NULL;
+ else if (!IS_ERR_OR_NULL(inode))
fid = AFS_FS_I(inode)->fid;
_debug("splice %p", dentry->d_inode);
@@ -1038,21 +1023,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
/*
* Check the validity of a dentry under RCU conditions.
*/
-static int afs_d_revalidate_rcu(struct dentry *dentry)
+static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry)
{
- struct afs_vnode *dvnode;
- struct dentry *parent;
- struct inode *dir;
long dir_version, de_version;
_enter("%p", dentry);
- /* Check the parent directory is still valid first. */
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- dvnode = AFS_FS_I(dir);
if (test_bit(AFS_VNODE_DELETED, &dvnode->flags))
return -ECHILD;
@@ -1080,11 +1056,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry)
* - NOTE! the hit can be a negative hit too, so we can't assume we have an
* inode
*/
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- struct afs_vnode *vnode, *dir;
+ struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir);
struct afs_fid fid;
- struct dentry *parent;
struct inode *inode;
struct key *key;
afs_dataversion_t dir_version, invalid_before;
@@ -1092,7 +1068,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
int ret;
if (flags & LOOKUP_RCU)
- return afs_d_revalidate_rcu(dentry);
+ return afs_d_revalidate_rcu(dir, dentry);
if (d_really_is_positive(dentry)) {
vnode = AFS_FS_I(d_inode(dentry));
@@ -1107,12 +1083,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (IS_ERR(key))
key = NULL;
- /* Hold the parent dentry so we can peer at it */
- parent = dget_parent(dentry);
- dir = AFS_FS_I(d_inode(parent));
-
/* validate the parent directory */
- afs_validate(dir, key);
+ ret = afs_validate(dir, key);
+ if (ret == -ERESTARTSYS) {
+ key_put(key);
+ return ret;
+ }
if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
_debug("%pd: parent dir deleted", dentry);
@@ -1137,7 +1113,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
afs_stat_v(dir, n_reval);
/* search the directory for this vnode */
- ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version);
+ ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version);
switch (ret) {
case 0:
/* the filename maps to something */
@@ -1181,22 +1157,19 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out_valid;
default:
- _debug("failed to iterate dir %pd: %d",
- parent, ret);
+ _debug("failed to iterate parent %pd2: %d", dentry, ret);
goto not_found;
}
out_valid:
dentry->d_fsdata = (void *)(unsigned long)dir_version;
out_valid_noupdate:
- dput(parent);
key_put(key);
_leave(" = 1 [valid]");
return 1;
not_found:
_debug("dropping dentry %pd2", dentry);
- dput(parent);
key_put(key);
_leave(" = 0 [bad]");
@@ -1251,9 +1224,10 @@ void afs_check_for_remote_deletion(struct afs_operation *op)
{
struct afs_vnode *vnode = op->file[0].vnode;
- switch (op->ac.abort_code) {
+ switch (afs_op_abort_code(op)) {
case VNOVNODE:
set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_nlink(&vnode->netfs.inode);
afs_break_callback(vnode, afs_cb_break_for_deleted);
}
}
@@ -1263,26 +1237,31 @@ void afs_check_for_remote_deletion(struct afs_operation *op)
*/
static void afs_vnode_new_inode(struct afs_operation *op)
{
+ struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode_param *vp = &op->file[1];
struct afs_vnode *vnode;
struct inode *inode;
_enter("");
- ASSERTCMP(op->error, ==, 0);
+ ASSERTCMP(afs_op_error(op), ==, 0);
inode = afs_iget(op, vp);
if (IS_ERR(inode)) {
/* ENOMEM or EINTR at a really inconvenient time - just abandon
* the new directory on the server.
*/
- op->error = PTR_ERR(inode);
+ afs_op_accumulate_error(op, PTR_ERR(inode), 0);
return;
}
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- if (!op->error)
+ if (S_ISDIR(inode->i_mode))
+ afs_mkdir_init_dir(vnode, dvp->vnode);
+ else if (S_ISLNK(inode->i_mode))
+ afs_init_new_symlink(vnode, op);
+ if (!afs_op_error(op))
afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
d_instantiate(op->dentry, inode);
}
@@ -1298,25 +1277,28 @@ static void afs_create_success(struct afs_operation *op)
static void afs_create_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode_param *vp = &op->file[1];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid,
op->create.reason);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_create_put(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- if (op->error)
+ if (afs_op_error(op))
d_drop(op->dentry);
}
@@ -1332,11 +1314,12 @@ static const struct afs_operation_ops afs_mkdir_operation = {
/*
* create a directory on an AFS filesystem
*/
-static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct afs_operation *op;
struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int ret;
_enter("{%llx:%llu},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@ -1344,9 +1327,11 @@ static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
op = afs_alloc_operation(NULL, dvnode->volume);
if (IS_ERR(op)) {
d_drop(dentry);
- return PTR_ERR(op);
+ return ERR_CAST(op);
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1354,8 +1339,11 @@ static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
op->dentry = dentry;
op->create.mode = S_IFDIR | mode;
op->create.reason = afs_edit_dir_for_mkdir;
+ op->mtime = current_time(dir);
op->ops = &afs_mkdir_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ERR_PTR(ret);
}
/*
@@ -1368,8 +1356,8 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
clear_nlink(&vnode->netfs.inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_rmdir);
+ afs_invalidate_dir(vnode, afs_dir_invalid_subdir_removed);
}
}
@@ -1383,18 +1371,21 @@ static void afs_rmdir_success(struct afs_operation *op)
static void afs_rmdir_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
afs_dir_remove_subdir(op->dentry);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_remove(dvnode, &op->dentry->d_name,
afs_edit_dir_for_rmdir);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_rmdir_put(struct afs_operation *op)
@@ -1429,6 +1420,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1452,10 +1445,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
op->file[1].vnode = vnode;
}
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+
+ /* Not all systems that can host afs servers have ENOTEMPTY. */
+ if (ret == -EEXIST)
+ ret = -ENOTEMPTY;
+out:
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
- return afs_put_operation(op);
+ ret = afs_put_operation(op);
+ goto out;
}
/*
@@ -1475,7 +1476,7 @@ static void afs_dir_remove_link(struct afs_operation *op)
struct dentry *dentry = op->dentry;
int ret;
- if (op->error != 0 ||
+ if (afs_op_error(op) ||
(op->file[1].scb.have_status && op->file[1].scb.have_error))
return;
if (d_really_is_positive(dentry))
@@ -1499,10 +1500,10 @@ static void afs_dir_remove_link(struct afs_operation *op)
ret = afs_validate(vnode, op->key);
if (ret != -ESTALE)
- op->error = ret;
+ afs_op_set_error(op, ret);
}
- _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, op->error);
+ _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, afs_op_error(op));
}
static void afs_unlink_success(struct afs_operation *op)
@@ -1518,22 +1519,25 @@ static void afs_unlink_success(struct afs_operation *op)
static void afs_unlink_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_remove(dvnode, &op->dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_unlink_put(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- if (op->unlink.need_rehash && op->error < 0 && op->error != -ENOENT)
+ if (op->unlink.need_rehash && afs_op_error(op) < 0 && afs_op_error(op) != -ENOENT)
d_rehash(op->dentry);
}
@@ -1566,6 +1570,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1574,7 +1580,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
/* Try to make sure we have a callback promise on the victim. */
ret = afs_validate(vnode, op->key);
if (ret < 0) {
- op->error = ret;
+ afs_op_set_error(op, ret);
goto error;
}
@@ -1583,7 +1589,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
- op->error = afs_sillyrename(dvnode, vnode, dentry, op->key);
+ afs_op_set_error(op, afs_sillyrename(dvnode, vnode, dentry, op->key));
goto error;
}
if (!d_unhashed(dentry)) {
@@ -1604,7 +1610,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
/* If there was a conflict with a third party, check the status of the
* unlinked vnode.
*/
- if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ if (afs_op_error(op) == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
op->file[1].update_ctime = false;
op->fetch_status.which = 1;
op->ops = &afs_fetch_status_operation;
@@ -1612,10 +1618,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
afs_wait_for_operation(op);
}
- return afs_put_operation(op);
-
error:
- return afs_put_operation(op);
+ ret = afs_put_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
}
static const struct afs_operation_ops afs_create_operation = {
@@ -1630,7 +1636,7 @@ static const struct afs_operation_ops afs_create_operation = {
/*
* create a regular file on an AFS filesystem
*/
-static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct afs_operation *op;
@@ -1649,6 +1655,8 @@ static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1657,8 +1665,11 @@ static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
op->dentry = dentry;
op->create.mode = S_IFREG | mode;
op->create.reason = afs_edit_dir_for_create;
+ op->mtime = current_time(dir);
op->ops = &afs_create_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
d_drop(dentry);
@@ -1685,7 +1696,7 @@ static void afs_link_success(struct afs_operation *op)
static void afs_link_put(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- if (op->error)
+ if (afs_op_error(op))
d_drop(op->dentry);
}
@@ -1723,6 +1734,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
ret = afs_validate(vnode, op->key);
if (ret < 0)
goto error_op;
@@ -1738,10 +1751,13 @@ static int afs_link(struct dentry *from, struct inode *dir,
op->dentry_2 = from;
op->ops = &afs_link_operation;
op->create.reason = afs_edit_dir_for_link;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error_op:
afs_put_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
error:
d_drop(dentry);
_leave(" = %d", ret);
@@ -1760,7 +1776,7 @@ static const struct afs_operation_ops afs_symlink_operation = {
/*
* create a symlink in an AFS filesystem
*/
-static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *content)
{
struct afs_operation *op;
@@ -1785,6 +1801,8 @@ static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
@@ -1792,7 +1810,10 @@ static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
op->ops = &afs_symlink_operation;
op->create.reason = afs_edit_dir_for_symlink;
op->create.symlink = content;
- return afs_do_sync_operation(op);
+ op->mtime = current_time(dir);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
d_drop(dentry);
@@ -1802,6 +1823,9 @@ error:
static void afs_rename_success(struct afs_operation *op)
{
+ struct afs_vnode *vnode = op->more_files[0].vnode;
+ struct afs_vnode *new_vnode = op->more_files[1].vnode;
+
_enter("op=%08x", op->debug_id);
op->ctime = op->file[0].scb.status.mtime_client;
@@ -1811,10 +1835,46 @@ static void afs_rename_success(struct afs_operation *op)
op->ctime = op->file[1].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[1]);
}
+ if (op->more_files[0].scb.have_status)
+ afs_vnode_commit_status(op, &op->more_files[0]);
+ if (op->more_files[1].scb.have_status)
+ afs_vnode_commit_status(op, &op->more_files[1]);
+
+ /* If we're moving a subdir between dirs, we need to update
+ * its DV counter too as the ".." will be altered.
+ */
+ if (op->file[0].vnode != op->file[1].vnode) {
+ if (S_ISDIR(vnode->netfs.inode.i_mode)) {
+ u64 new_dv;
+
+ write_seqlock(&vnode->cb_lock);
+
+ new_dv = vnode->status.data_version + 1;
+ trace_afs_set_dv(vnode, new_dv);
+ vnode->status.data_version = new_dv;
+ inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
+
+ write_sequnlock(&vnode->cb_lock);
+ }
+
+ if ((op->rename.rename_flags & RENAME_EXCHANGE) &&
+ S_ISDIR(new_vnode->netfs.inode.i_mode)) {
+ u64 new_dv;
+
+ write_seqlock(&new_vnode->cb_lock);
+
+ new_dv = new_vnode->status.data_version + 1;
+ new_vnode->status.data_version = new_dv;
+ inode_set_iversion_raw(&new_vnode->netfs.inode, new_dv);
+
+ write_sequnlock(&new_vnode->cb_lock);
+ }
+ }
}
static void afs_rename_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources orig_cres = {}, new_cres = {};
struct afs_vnode_param *orig_dvp = &op->file[0];
struct afs_vnode_param *new_dvp = &op->file[1];
struct afs_vnode *orig_dvnode = orig_dvp->vnode;
@@ -1831,6 +1891,10 @@ static void afs_rename_edit_dir(struct afs_operation *op)
op->rename.rehash = NULL;
}
+ fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode));
+ if (new_dvnode != orig_dvnode)
+ fscache_begin_write_operation(&new_cres, afs_vnode_cache(new_dvnode));
+
down_write(&orig_dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
@@ -1852,6 +1916,12 @@ static void afs_rename_edit_dir(struct afs_operation *op)
&vnode->fid, afs_edit_dir_for_rename_2);
}
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ new_dvnode != orig_dvnode &&
+ test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_edit_dir_update(vnode, &dotdot_name, new_dvnode,
+ afs_edit_dir_for_rename_sub);
+
new_inode = d_inode(new_dentry);
if (new_inode) {
spin_lock(&new_inode->i_lock);
@@ -1864,9 +1934,6 @@ static void afs_rename_edit_dir(struct afs_operation *op)
/* Now we can update d_fsdata on the dentries to reflect their
* new parent's data_version.
- *
- * Note that if we ever implement RENAME_EXCHANGE, we'll have
- * to update both dentries with opposing dir versions.
*/
afs_update_dentry_version(op, new_dvp, op->dentry);
afs_update_dentry_version(op, new_dvp, op->dentry_2);
@@ -1874,6 +1941,70 @@ static void afs_rename_edit_dir(struct afs_operation *op)
d_move(old_dentry, new_dentry);
up_write(&new_dvnode->validate_lock);
+ fscache_end_operation(&orig_cres);
+ if (new_dvnode != orig_dvnode)
+ fscache_end_operation(&new_cres);
+}
+
+static void afs_rename_exchange_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ struct afs_vnode *orig_dvnode = orig_dvp->vnode;
+ struct afs_vnode *new_dvnode = new_dvp->vnode;
+ struct afs_vnode *old_vnode = op->more_files[0].vnode;
+ struct afs_vnode *new_vnode = op->more_files[1].vnode;
+ struct dentry *old_dentry = op->dentry;
+ struct dentry *new_dentry = op->dentry_2;
+
+ _enter("op=%08x", op->debug_id);
+
+ if (new_dvnode == orig_dvnode) {
+ down_write(&orig_dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+ orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta) {
+ afs_edit_dir_update(orig_dvnode, &old_dentry->d_name,
+ new_vnode, afs_edit_dir_for_rename_0);
+ afs_edit_dir_update(orig_dvnode, &new_dentry->d_name,
+ old_vnode, afs_edit_dir_for_rename_1);
+ }
+
+ d_exchange(old_dentry, new_dentry);
+ up_write(&orig_dvnode->validate_lock);
+ } else {
+ down_write(&orig_dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+ orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
+ afs_edit_dir_update(orig_dvnode, &old_dentry->d_name,
+ new_vnode, afs_edit_dir_for_rename_0);
+
+ up_write(&orig_dvnode->validate_lock);
+ down_write(&new_dvnode->validate_lock);
+
+ if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
+ new_dvnode->status.data_version == new_dvp->dv_before + new_dvp->dv_delta)
+ afs_edit_dir_update(new_dvnode, &new_dentry->d_name,
+ old_vnode, afs_edit_dir_for_rename_1);
+
+ if (S_ISDIR(old_vnode->netfs.inode.i_mode) &&
+ test_bit(AFS_VNODE_DIR_VALID, &old_vnode->flags))
+ afs_edit_dir_update(old_vnode, &dotdot_name, new_dvnode,
+ afs_edit_dir_for_rename_sub);
+
+ if (S_ISDIR(new_vnode->netfs.inode.i_mode) &&
+ test_bit(AFS_VNODE_DIR_VALID, &new_vnode->flags))
+ afs_edit_dir_update(new_vnode, &dotdot_name, orig_dvnode,
+ afs_edit_dir_for_rename_sub);
+
+ /* Now we can update d_fsdata on the dentries to reflect their
+ * new parents' data_version.
+ */
+ afs_update_dentry_version(op, new_dvp, old_dentry);
+ afs_update_dentry_version(op, orig_dvp, new_dentry);
+
+ d_exchange(old_dentry, new_dentry);
+ up_write(&new_dvnode->validate_lock);
+ }
}
static void afs_rename_put(struct afs_operation *op)
@@ -1882,7 +2013,7 @@ static void afs_rename_put(struct afs_operation *op)
if (op->rename.rehash)
d_rehash(op->rename.rehash);
dput(op->rename.tmp);
- if (op->error)
+ if (afs_op_error(op))
d_rehash(op->dentry);
}
@@ -1894,18 +2025,44 @@ static const struct afs_operation_ops afs_rename_operation = {
.put = afs_rename_put,
};
+#if 0 /* Autoswitched in yfs_fs_rename_replace(). */
+static const struct afs_operation_ops afs_rename_replace_operation = {
+ .issue_afs_rpc = NULL,
+ .issue_yfs_rpc = yfs_fs_rename_replace,
+ .success = afs_rename_success,
+ .edit_dir = afs_rename_edit_dir,
+ .put = afs_rename_put,
+};
+#endif
+
+static const struct afs_operation_ops afs_rename_noreplace_operation = {
+ .issue_afs_rpc = NULL,
+ .issue_yfs_rpc = yfs_fs_rename_noreplace,
+ .success = afs_rename_success,
+ .edit_dir = afs_rename_edit_dir,
+ .put = afs_rename_put,
+};
+
+static const struct afs_operation_ops afs_rename_exchange_operation = {
+ .issue_afs_rpc = NULL,
+ .issue_yfs_rpc = yfs_fs_rename_exchange,
+ .success = afs_rename_success,
+ .edit_dir = afs_rename_exchange_edit_dir,
+ .put = afs_rename_put,
+};
+
/*
* rename a file in an AFS filesystem and/or move it between directories
*/
-static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
struct afs_operation *op;
- struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+ struct afs_vnode *orig_dvnode, *new_dvnode, *vnode, *new_vnode = NULL;
int ret;
- if (flags)
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
return -EINVAL;
/* Don't allow silly-rename files be moved around. */
@@ -1915,6 +2072,8 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
vnode = AFS_FS_I(d_inode(old_dentry));
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
+ if (d_is_positive(new_dentry))
+ new_vnode = AFS_FS_I(d_inode(new_dentry));
_enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
@@ -1926,11 +2085,20 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(orig_dvnode), true);
+ if (new_dvnode != orig_dvnode)
+ fscache_use_cookie(afs_vnode_cache(new_dvnode), true);
+
ret = afs_validate(vnode, op->key);
- op->error = ret;
+ afs_op_set_error(op, ret);
if (ret < 0)
goto error;
+ ret = -ENOMEM;
+ op->more_files = kvcalloc(2, sizeof(struct afs_vnode_param), GFP_KERNEL);
+ if (!op->more_files)
+ goto error;
+
afs_op_set_vnode(op, 0, orig_dvnode);
afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
op->file[0].dv_delta = 1;
@@ -1939,46 +2107,63 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
op->file[1].modification = true;
op->file[0].update_ctime = true;
op->file[1].update_ctime = true;
+ op->more_files[0].vnode = vnode;
+ op->more_files[0].speculative = true;
+ op->more_files[1].vnode = new_vnode;
+ op->more_files[1].speculative = true;
+ op->nr_files = 4;
op->dentry = old_dentry;
op->dentry_2 = new_dentry;
+ op->rename.rename_flags = flags;
op->rename.new_negative = d_is_negative(new_dentry);
- op->ops = &afs_rename_operation;
- /* For non-directories, check whether the target is busy and if so,
- * make a copy of the dentry and then do a silly-rename. If the
- * silly-rename succeeds, the copied dentry is hashed and becomes the
- * new target.
- */
- if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
- /* To prevent any new references to the target during the
- * rename, we unhash the dentry in advance.
+ if (flags & RENAME_NOREPLACE) {
+ op->ops = &afs_rename_noreplace_operation;
+ } else if (flags & RENAME_EXCHANGE) {
+ op->ops = &afs_rename_exchange_operation;
+ d_drop(new_dentry);
+ } else {
+ /* If we might displace the target, we might need to do silly
+ * rename.
*/
- if (!d_unhashed(new_dentry)) {
- d_drop(new_dentry);
- op->rename.rehash = new_dentry;
- }
+ op->ops = &afs_rename_operation;
- if (d_count(new_dentry) > 2) {
- /* copy the target dentry's name */
- op->rename.tmp = d_alloc(new_dentry->d_parent,
- &new_dentry->d_name);
- if (!op->rename.tmp) {
- op->error = -ENOMEM;
- goto error;
+ /* For non-directories, check whether the target is busy and if
+ * so, make a copy of the dentry and then do a silly-rename.
+ * If the silly-rename succeeds, the copied dentry is hashed
+ * and becomes the new target.
+ */
+ if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+ /* To prevent any new references to the target during
+ * the rename, we unhash the dentry in advance.
+ */
+ if (!d_unhashed(new_dentry)) {
+ d_drop(new_dentry);
+ op->rename.rehash = new_dentry;
}
- ret = afs_sillyrename(new_dvnode,
- AFS_FS_I(d_inode(new_dentry)),
- new_dentry, op->key);
- if (ret) {
- op->error = ret;
- goto error;
+ if (d_count(new_dentry) > 2) {
+ /* copy the target dentry's name */
+ op->rename.tmp = d_alloc(new_dentry->d_parent,
+ &new_dentry->d_name);
+ if (!op->rename.tmp) {
+ afs_op_nomem(op);
+ goto error;
+ }
+
+ ret = afs_sillyrename(new_dvnode,
+ AFS_FS_I(d_inode(new_dentry)),
+ new_dentry, op->key);
+ if (ret) {
+ afs_op_set_error(op, ret);
+ goto error;
+ }
+
+ op->dentry_2 = op->rename.tmp;
+ op->rename.rehash = NULL;
+ op->rename.new_negative = true;
}
-
- op->dentry_2 = op->rename.tmp;
- op->rename.rehash = NULL;
- op->rename.new_negative = true;
}
}
@@ -1993,47 +2178,45 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
*/
d_drop(old_dentry);
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ if (ret == -ENOTSUPP)
+ ret = -EINVAL;
+out:
+ afs_dir_unuse_cookie(orig_dvnode, ret);
+ if (new_dvnode != orig_dvnode)
+ afs_dir_unuse_cookie(new_dvnode, ret);
+ return ret;
error:
- return afs_put_operation(op);
-}
-
-/*
- * Release a directory folio and clean up its private state if it's not busy
- * - return true if the folio can now be released, false if not
- */
-static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
-{
- struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
-
- folio_detach_private(folio);
-
- /* The directory will need reloading. */
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_relpg);
- return true;
+ ret = afs_put_operation(op);
+ goto out;
}
/*
- * Invalidate part or all of a folio.
+ * Write the file contents to the cache as a single blob.
*/
-static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- size_t length)
+int afs_single_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{%lu},%zu,%zu", folio->index, offset, length);
-
- BUG_ON(!folio_test_locked(folio));
+ struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
+ int ret = 0;
- /* The directory will need reloading. */
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_inval);
+ /* Need to lock to prevent the folio queue and folios from being thrown
+ * away.
+ */
+ down_read(&dvnode->validate_lock);
+
+ if (is_dir ?
+ test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
+ atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
+ i_size_read(&dvnode->netfs.inode));
+ ret = netfs_writeback_single(mapping, wbc, &iter);
+ }
- /* we clean up only if the entire folio is being invalidated */
- if (offset == 0 && length == folio_size(folio))
- folio_detach_private(folio);
+ up_read(&dvnode->validate_lock);
+ return ret;
}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index 0ab7752d1b75..fd3aa9f97ce6 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -10,6 +10,7 @@
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/iversion.h>
+#include <linux/folio_queue.h>
#include "internal.h"
#include "xdr_fs.h"
@@ -105,31 +106,66 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
}
/*
- * Get a new directory folio.
+ * Get a specific block, extending the directory storage to cover it as needed.
*/
-static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
+static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block)
{
- struct address_space *mapping = vnode->netfs.inode.i_mapping;
+ struct folio_queue *fq;
+ struct afs_vnode *dvnode = iter->dvnode;
struct folio *folio;
+ size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+ size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+ int ret;
+
+ if (dvnode->directory_size < blend) {
+ size_t cur_size = dvnode->directory_size;
+
+ ret = netfs_alloc_folioq_buffer(
+ NULL, &dvnode->directory, &cur_size, blend,
+ mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ dvnode->directory_size = cur_size;
+ if (ret < 0)
+ goto fail;
+ }
- folio = __filemap_get_folio(mapping, index,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- mapping->gfp_mask);
- if (!folio)
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- else if (folio && !folio_test_private(folio))
- folio_attach_private(folio, (void *)1);
+ fq = iter->fq;
+ if (!fq)
+ fq = dvnode->directory;
+
+ /* Search the folio queue for the folio containing the block... */
+ for (; fq; fq = fq->next) {
+ for (int s = iter->fq_slot; s < folioq_count(fq); s++) {
+ size_t fsize = folioq_folio_size(fq, s);
+
+ if (blend <= fpos + fsize) {
+ /* ... and then return the mapped block. */
+ folio = folioq_folio(fq, s);
+ if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+ goto fail;
+ iter->fq = fq;
+ iter->fq_slot = s;
+ iter->fpos = fpos;
+ return kmap_local_folio(folio, blpos - fpos);
+ }
+ fpos += fsize;
+ }
+ iter->fq_slot = 0;
+ }
- return folio;
+fail:
+ iter->fq = NULL;
+ iter->fq_slot = 0;
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+ return NULL;
}
/*
* Scan a directory block looking for a dirent of the right name.
*/
-static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
+static int afs_dir_scan_block(const union afs_xdr_dir_block *block, const struct qstr *name,
unsigned int blocknum)
{
- union afs_xdr_dirent *de;
+ const union afs_xdr_dirent *de;
u64 bitmap;
int d, len, n;
@@ -203,14 +239,13 @@ static void afs_edit_init_block(union afs_xdr_dir_block *meta,
* The caller must hold the inode locked.
*/
void afs_edit_dir_add(struct afs_vnode *vnode,
- struct qstr *name, struct afs_fid *new_fid,
+ const struct qstr *name, struct afs_fid *new_fid,
enum afs_edit_dir_reason why)
{
union afs_xdr_dir_block *meta, *block;
union afs_xdr_dirent *de;
- struct folio *folio0, *folio;
- unsigned int need_slots, nr_blocks, b;
- pgoff_t index;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ unsigned int nr_blocks, b, entry;
loff_t i_size;
int slot;
@@ -219,20 +254,17 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
i_size = i_size_read(&vnode->netfs.inode);
if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_bad_size);
return;
}
- folio0 = afs_dir_get_folio(vnode, 0);
- if (!folio0) {
- _leave(" [fgp]");
+ meta = afs_dir_get_block(&iter, 0);
+ if (!meta)
return;
- }
/* Work out how many slots we're going to need. */
- need_slots = afs_dir_calc_slots(name->len);
+ iter.nr_slots = afs_dir_calc_slots(name->len);
- meta = kmap_local_folio(folio0, 0);
if (i_size == 0)
goto new_directory;
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
@@ -244,22 +276,21 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
/* If the directory extended into a new folio, then we need to
* tack a new folio on the end.
*/
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
- goto error;
- if (index >= folio_nr_pages(folio0)) {
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
- goto error;
- } else {
- folio = folio0;
- }
+ goto error_too_many_blocks;
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+ /* Lower dir blocks have a counter in the header we can check. */
+ if (b < AFS_DIR_BLOCKS_WITH_CTR &&
+ meta->meta.alloc_ctrs[b] < iter.nr_slots)
+ continue;
+
+ block = afs_dir_get_block(&iter, b);
+ if (!block)
+ goto error;
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
+ goto already_invalidated;
_debug("block %u: %2u %3u %u",
b,
@@ -274,31 +305,23 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
}
- /* Only lower dir blocks have a counter in the header. */
- if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
- meta->meta.alloc_ctrs[b] >= need_slots) {
- /* We need to try and find one or more consecutive
- * slots to hold the entry.
- */
- slot = afs_find_contig_bits(block, need_slots);
- if (slot >= 0) {
- _debug("slot %u", slot);
- goto found_space;
- }
+ /* We need to try and find one or more consecutive slots to
+ * hold the entry.
+ */
+ slot = afs_find_contig_bits(block, iter.nr_slots);
+ if (slot >= 0) {
+ _debug("slot %u", slot);
+ goto found_space;
}
kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
}
/* There are no spare slots of sufficient size, yet the operation
* succeeded. Download the directory again.
*/
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_no_slots);
goto out_unmap;
new_directory:
@@ -306,8 +329,7 @@ new_directory:
i_size = AFS_DIR_BLOCK_SIZE;
afs_set_i_size(vnode, i_size);
slot = AFS_DIR_RESV_BLOCKS0;
- folio = folio0;
- block = kmap_local_folio(folio, 0);
+ block = afs_dir_get_block(&iter, 0);
nr_blocks = 1;
b = 0;
@@ -325,41 +347,39 @@ found_space:
de->u.name[name->len] = 0;
/* Adjust the bitmap. */
- afs_set_contig_bits(block, slot, need_slots);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ afs_set_contig_bits(block, slot, iter.nr_slots);
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
- meta->meta.alloc_ctrs[b] -= need_slots;
+ meta->meta.alloc_ctrs[b] -= iter.nr_slots;
+
+ /* Adjust the hash chain. */
+ entry = b * AFS_DIR_SLOTS_PER_BLOCK + slot;
+ iter.bucket = afs_dir_hash_name(name);
+ de->u.hash_next = meta->meta.hashtable[iter.bucket];
+ meta->meta.hashtable[iter.bucket] = htons(entry);
+ kunmap_local(block);
inode_inc_iversion_raw(&vnode->netfs.inode);
afs_stat_v(vnode, n_dir_cr);
_debug("Insert %s in %u[%u]", name->name, b, slot);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+
out_unmap:
kunmap_local(meta);
- folio_unlock(folio0);
- folio_put(folio0);
_leave("");
return;
-invalidated:
+already_invalidated:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
goto out_unmap;
+error_too_many_blocks:
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_too_many_blocks);
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
@@ -371,15 +391,16 @@ error:
* The caller must hold the inode locked.
*/
void afs_edit_dir_remove(struct afs_vnode *vnode,
- struct qstr *name, enum afs_edit_dir_reason why)
+ const struct qstr *name, enum afs_edit_dir_reason why)
{
- union afs_xdr_dir_block *meta, *block;
- union afs_xdr_dirent *de;
- struct folio *folio0, *folio;
- unsigned int need_slots, nr_blocks, b;
- pgoff_t index;
+ union afs_xdr_dir_block *meta, *block, *pblock;
+ union afs_xdr_dirent *de, *pde;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ struct afs_fid fid;
+ unsigned int b, slot, entry;
loff_t i_size;
- int slot;
+ __be16 next;
+ int found;
_enter(",,{%d,%s},", name->len, name->name);
@@ -387,81 +408,95 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
if (i_size < AFS_DIR_BLOCK_SIZE ||
i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_bad_size);
return;
}
- nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
- folio0 = afs_dir_get_folio(vnode, 0);
- if (!folio0) {
- _leave(" [fgp]");
+ if (!afs_dir_init_iter(&iter, name))
return;
- }
-
- /* Work out how many slots we're going to discard. */
- need_slots = afs_dir_calc_slots(name->len);
-
- meta = kmap_local_folio(folio0, 0);
-
- /* Find a block that has sufficient slots available. Each folio
- * contains two or more directory blocks.
- */
- for (b = 0; b < nr_blocks; b++) {
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
- if (index >= folio_nr_pages(folio0)) {
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
- goto error;
- } else {
- folio = folio0;
- }
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
-
- /* Abandon the edit if we got a callback break. */
- if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
-
- if (b > AFS_DIR_BLOCKS_WITH_CTR ||
- meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
- slot = afs_dir_scan_block(block, name, b);
- if (slot >= 0)
- goto found_dirent;
- }
+ meta = afs_dir_find_block(&iter, 0);
+ if (!meta)
+ return;
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ /* Find the entry in the blob. */
+ found = afs_dir_search_bucket(&iter, name, &fid);
+ if (found < 0) {
+ /* Didn't find the dirent to clobber. Re-download. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
+ 0, 0, 0, 0, name->name);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_wrong_name);
+ goto out_unmap;
}
- /* Didn't find the dirent to clobber. Download the directory again. */
- trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
- 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- goto out_unmap;
+ entry = found;
+ b = entry / AFS_DIR_SLOTS_PER_BLOCK;
+ slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
-found_dirent:
+ block = afs_dir_find_block(&iter, b);
+ if (!block)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto already_invalidated;
+
+ /* Check and clear the entry. */
de = &block->dirents[slot];
+ if (de->u.valid != 1)
+ goto error_unmap;
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
ntohl(de->u.vnode), ntohl(de->u.unique),
name->name);
- memset(de, 0, sizeof(*de) * need_slots);
-
/* Adjust the bitmap. */
- afs_clear_contig_bits(block, slot, need_slots);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ afs_clear_contig_bits(block, slot, iter.nr_slots);
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
- meta->meta.alloc_ctrs[b] += need_slots;
+ meta->meta.alloc_ctrs[b] += iter.nr_slots;
+
+ /* Clear the constituent entries. */
+ next = de->u.hash_next;
+ memset(de, 0, sizeof(*de) * iter.nr_slots);
+ kunmap_local(block);
+
+ /* Adjust the hash chain: if iter->prev_entry is 0, the hashtable head
+ * index is previous; otherwise it's slot number of the previous entry.
+ */
+ if (!iter.prev_entry) {
+ __be16 prev_next = meta->meta.hashtable[iter.bucket];
+
+ if (unlikely(prev_next != htons(entry))) {
+ pr_warn("%llx:%llx:%x: not head of chain b=%x p=%x,%x e=%x %*s",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ iter.bucket, iter.prev_entry, prev_next, entry,
+ name->len, name->name);
+ goto error;
+ }
+ meta->meta.hashtable[iter.bucket] = next;
+ } else {
+ unsigned int pb = iter.prev_entry / AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int ps = iter.prev_entry % AFS_DIR_SLOTS_PER_BLOCK;
+ __be16 prev_next;
+
+ pblock = afs_dir_find_block(&iter, pb);
+ if (!pblock)
+ goto error;
+ pde = &pblock->dirents[ps];
+ prev_next = pde->u.hash_next;
+ if (prev_next != htons(entry)) {
+ kunmap_local(pblock);
+ pr_warn("%llx:%llx:%x: not prev in chain b=%x p=%x,%x e=%x %*s",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ iter.bucket, iter.prev_entry, prev_next, entry,
+ name->len, name->name);
+ goto error;
+ }
+ pde->u.hash_next = next;
+ kunmap_local(pblock);
+ }
+
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
afs_stat_v(vnode, n_dir_rm);
@@ -469,25 +504,145 @@ found_dirent:
out_unmap:
kunmap_local(meta);
- folio_unlock(folio0);
- folio_put(folio0);
_leave("");
return;
-invalidated:
+already_invalidated:
+ kunmap_local(block);
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
goto out_unmap;
+error_unmap:
+ kunmap_local(block);
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
+
+/*
+ * Edit an entry in a directory to update the vnode it refers to. This is also
+ * used to update the ".." entry in a directory.
+ */
+void afs_edit_dir_update(struct afs_vnode *vnode, const struct qstr *name,
+ struct afs_vnode *new_dvnode, enum afs_edit_dir_reason why)
+{
+ union afs_xdr_dir_block *block;
+ union afs_xdr_dirent *de;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ unsigned int nr_blocks, b;
+ loff_t i_size;
+ int slot;
+
+ _enter("");
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size < AFS_DIR_BLOCK_SIZE) {
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size);
+ return;
+ }
+
+ nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+ /* Find a block that has sufficient slots available. Each folio
+ * contains two or more directory blocks.
+ */
+ for (b = 0; b < nr_blocks; b++) {
+ block = afs_dir_get_block(&iter, b);
+ if (!block)
+ goto error;
+
+ /* Abandon the edit if we got a callback break. */
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto already_invalidated;
+
+ slot = afs_dir_scan_block(block, name, b);
+ if (slot >= 0)
+ goto found_dirent;
+
+ kunmap_local(block);
+ }
+
+ /* Didn't find the dirent to clobber. Download the directory again. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
+ 0, 0, 0, 0, name->name);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_no_dd);
+ goto out;
+
+found_dirent:
+ de = &block->dirents[slot];
+ de->u.vnode = htonl(new_dvnode->fid.vnode);
+ de->u.unique = htonl(new_dvnode->fid.unique);
+
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_dd, b, slot,
+ ntohl(de->u.vnode), ntohl(de->u.unique), name->name);
+
+ kunmap_local(block);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+ inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+
+out:
+ _leave("");
+ return;
+
+already_invalidated:
+ kunmap_local(block);
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
+ 0, 0, 0, 0, name->name);
+ goto out;
+
+error:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
+ 0, 0, 0, 0, name->name);
+ goto out;
+}
+
+/*
+ * Initialise a new directory. We need to fill in the "." and ".." entries.
+ */
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_dvnode)
+{
+ union afs_xdr_dir_block *meta;
+ struct afs_dir_iter iter = { .dvnode = dvnode };
+ union afs_xdr_dirent *de;
+ unsigned int slot = AFS_DIR_RESV_BLOCKS0;
+ loff_t i_size;
+
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size != AFS_DIR_BLOCK_SIZE) {
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_add_bad_size);
+ return;
+ }
+
+ meta = afs_dir_get_block(&iter, 0);
+ if (!meta)
+ return;
+
+ afs_edit_init_block(meta, meta, 0);
+
+ de = &meta->dirents[slot];
+ de->u.valid = 1;
+ de->u.vnode = htonl(dvnode->fid.vnode);
+ de->u.unique = htonl(dvnode->fid.unique);
+ memcpy(de->u.name, ".", 2);
+ trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+ dvnode->fid.vnode, dvnode->fid.unique, ".");
+ slot++;
+
+ de = &meta->dirents[slot];
+ de->u.valid = 1;
+ de->u.vnode = htonl(parent_dvnode->fid.vnode);
+ de->u.unique = htonl(parent_dvnode->fid.unique);
+ memcpy(de->u.name, "..", 3);
+ trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+ parent_dvnode->fid.vnode, parent_dvnode->fid.unique, "..");
+
+ afs_set_contig_bits(meta, AFS_DIR_RESV_BLOCKS0, 2);
+ meta->meta.alloc_ctrs[0] -= 2;
+ kunmap_local(meta);
+
+ netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
+ set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+}
diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c
new file mode 100644
index 000000000000..d2516e55b5ed
--- /dev/null
+++ b/fs/afs/dir_search.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Search a directory's hash table.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/draft-keiser-afs3-directory-object-00
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+
+/*
+ * Calculate the name hash.
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name)
+{
+ const unsigned char *p = name->name;
+ unsigned int hash = 0, i;
+ int bucket;
+
+ for (i = 0; i < name->len; i++)
+ hash = (hash * 173) + p[i];
+ bucket = hash & (AFS_DIR_HASHTBL_SIZE - 1);
+ if (hash > INT_MAX) {
+ bucket = AFS_DIR_HASHTBL_SIZE - bucket;
+ bucket &= (AFS_DIR_HASHTBL_SIZE - 1);
+ }
+ return bucket;
+}
+
+/*
+ * Reset a directory iterator.
+ */
+static bool afs_dir_reset_iter(struct afs_dir_iter *iter)
+{
+ unsigned long long i_size = i_size_read(&iter->dvnode->netfs.inode);
+ unsigned int nblocks;
+
+ /* Work out the maximum number of steps we can take. */
+ nblocks = umin(i_size / AFS_DIR_BLOCK_SIZE, AFS_DIR_MAX_BLOCKS);
+ if (!nblocks)
+ return false;
+ iter->loop_check = nblocks * (AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS);
+ iter->prev_entry = 0; /* Hash head is previous */
+ return true;
+}
+
+/*
+ * Initialise a directory iterator for looking up a name.
+ */
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name)
+{
+ iter->nr_slots = afs_dir_calc_slots(name->len);
+ iter->bucket = afs_dir_hash_name(name);
+ return afs_dir_reset_iter(iter);
+}
+
+/*
+ * Get a specific block.
+ */
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block)
+{
+ struct folio_queue *fq = iter->fq;
+ struct afs_vnode *dvnode = iter->dvnode;
+ struct folio *folio;
+ size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+ size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+ int slot = iter->fq_slot;
+
+ _enter("%zx,%d", block, slot);
+
+ if (iter->block) {
+ kunmap_local(iter->block);
+ iter->block = NULL;
+ }
+
+ if (dvnode->directory_size < blend)
+ goto fail;
+
+ if (!fq || blpos < fpos) {
+ fq = dvnode->directory;
+ slot = 0;
+ fpos = 0;
+ }
+
+ /* Search the folio queue for the folio containing the block... */
+ for (; fq; fq = fq->next) {
+ for (; slot < folioq_count(fq); slot++) {
+ size_t fsize = folioq_folio_size(fq, slot);
+
+ if (blend <= fpos + fsize) {
+ /* ... and then return the mapped block. */
+ folio = folioq_folio(fq, slot);
+ if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+ goto fail;
+ iter->fq = fq;
+ iter->fq_slot = slot;
+ iter->fpos = fpos;
+ iter->block = kmap_local_folio(folio, blpos - fpos);
+ return iter->block;
+ }
+ fpos += fsize;
+ }
+ slot = 0;
+ }
+
+fail:
+ iter->fq = NULL;
+ iter->fq_slot = 0;
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+ return NULL;
+}
+
+/*
+ * Search through a directory bucket.
+ */
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+ struct afs_fid *_fid)
+{
+ const union afs_xdr_dir_block *meta;
+ unsigned int entry;
+ int ret = -ESTALE;
+
+ meta = afs_dir_find_block(iter, 0);
+ if (!meta)
+ return -ESTALE;
+
+ entry = ntohs(meta->meta.hashtable[iter->bucket & (AFS_DIR_HASHTBL_SIZE - 1)]);
+ _enter("%x,%x", iter->bucket, entry);
+
+ while (entry) {
+ const union afs_xdr_dir_block *block;
+ const union afs_xdr_dirent *dire;
+ unsigned int blnum = entry / AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int resv = (blnum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+
+ _debug("search %x", entry);
+
+ if (slot < resv) {
+ kdebug("slot out of range h=%x rs=%2x sl=%2x-%2x",
+ iter->bucket, resv, slot, slot + iter->nr_slots - 1);
+ goto bad;
+ }
+
+ block = afs_dir_find_block(iter, blnum);
+ if (!block)
+ goto bad;
+ dire = &block->dirents[slot];
+
+ if (slot + iter->nr_slots <= AFS_DIR_SLOTS_PER_BLOCK &&
+ memcmp(dire->u.name, name->name, name->len) == 0 &&
+ dire->u.name[name->len] == '\0') {
+ _fid->vnode = ntohl(dire->u.vnode);
+ _fid->unique = ntohl(dire->u.unique);
+ ret = entry;
+ goto found;
+ }
+
+ iter->prev_entry = entry;
+ entry = ntohs(dire->u.hash_next);
+ if (!--iter->loop_check) {
+ kdebug("dir chain loop h=%x", iter->bucket);
+ goto bad;
+ }
+ }
+
+ ret = -ENOENT;
+found:
+ if (iter->block) {
+ kunmap_local(iter->block);
+ iter->block = NULL;
+ }
+
+bad:
+ if (ret == -ESTALE)
+ afs_invalidate_dir(iter->dvnode, afs_dir_invalid_iter_stale);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Search the appropriate hash chain in the contents of an AFS directory.
+ */
+int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
+ struct afs_fid *_fid, afs_dataversion_t *_dir_version)
+{
+ struct afs_dir_iter iter = { .dvnode = dvnode, };
+ int ret, retry_limit = 3;
+
+ _enter("{%lu},,,", dvnode->netfs.inode.i_ino);
+
+ if (!afs_dir_init_iter(&iter, name))
+ return -ENOENT;
+ do {
+ if (--retry_limit < 0) {
+ pr_warn("afs_read_dir(): Too many retries\n");
+ ret = -ESTALE;
+ break;
+ }
+ ret = afs_read_dir(dvnode, NULL);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ break;
+ if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
+ ret = -ESTALE;
+ break;
+ }
+ continue;
+ }
+ *_dir_version = inode_peek_iversion_raw(&dvnode->netfs.inode);
+
+ ret = afs_dir_search_bucket(&iter, name, _fid);
+ up_read(&dvnode->validate_lock);
+ if (ret == -ESTALE)
+ afs_dir_reset_iter(&iter);
+ } while (ret == -ESTALE);
+
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index bb5807e87fa4..014495d4b868 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -69,6 +69,12 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
if (IS_ERR(op))
return PTR_ERR(op);
+ op->more_files = kvcalloc(2, sizeof(struct afs_vnode_param), GFP_KERNEL);
+ if (!op->more_files) {
+ afs_put_operation(op);
+ return -ENOMEM;
+ }
+
afs_op_set_vnode(op, 0, dvnode);
afs_op_set_vnode(op, 1, dvnode);
op->file[0].dv_delta = 1;
@@ -77,6 +83,11 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
op->file[1].modification = true;
op->file[0].update_ctime = true;
op->file[1].update_ctime = true;
+ op->more_files[0].vnode = AFS_FS_I(d_inode(old));
+ op->more_files[0].speculative = true;
+ op->more_files[1].vnode = AFS_FS_I(d_inode(new));
+ op->more_files[1].speculative = true;
+ op->nr_files = 4;
op->dentry = old;
op->dentry_2 = new;
@@ -113,16 +124,14 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
sdentry = NULL;
do {
- int slen;
-
dput(sdentry);
sillycounter++;
/* Create a silly name. Note that the ".__afs" prefix is
* understood by the salvager and must not be changed.
*/
- slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+ sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
/* N.B. Better to return EBUSY here ... it could be dangerous
* to delete the file while it's in use.
@@ -218,7 +227,7 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
/* If there was a conflict with a third party, check the status of the
* unlinked vnode.
*/
- if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ if (op->cumul_error.error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
op->file[1].update_ctime = false;
op->fetch_status.which = 1;
op->ops = &afs_fetch_status_operation;
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index d7d9402ff718..aa56e8951e03 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -10,16 +10,19 @@
#include <linux/dns_resolver.h>
#include "internal.h"
-static atomic_t afs_autocell_ino;
+#define AFS_MIN_DYNROOT_CELL_INO 4 /* Allow for ., .., @cell, .@cell */
+#define AFS_MAX_DYNROOT_CELL_INO ((unsigned int)INT_MAX)
+
+static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino);
/*
* iget5() comparator for inode created by autocell operations
- *
- * These pseudo inodes don't match anything.
*/
static int afs_iget5_pseudo_test(struct inode *inode, void *opaque)
{
- return 0;
+ struct afs_fid *fid = opaque;
+
+ return inode->i_ino == fid->vnode;
}
/*
@@ -39,28 +42,16 @@ static int afs_iget5_pseudo_set(struct inode *inode, void *opaque)
}
/*
- * Create an inode for a dynamic root directory or an autocell dynamic
- * automount dir.
+ * Create an inode for an autocell dynamic automount dir.
*/
-struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
+static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino)
{
- struct afs_super_info *as = AFS_FS_S(sb);
struct afs_vnode *vnode;
struct inode *inode;
- struct afs_fid fid = {};
+ struct afs_fid fid = { .vnode = ino, .unique = 1, };
_enter("");
- if (as->volume)
- fid.vid = as->volume->vid;
- if (root) {
- fid.vnode = 1;
- fid.unique = 1;
- } else {
- fid.vnode = atomic_inc_return(&afs_autocell_ino);
- fid.unique = 0;
- }
-
inode = iget5_locked(sb, fid.vnode,
afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
if (!inode) {
@@ -73,149 +64,76 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
vnode = AFS_FS_I(inode);
- /* there shouldn't be an existing inode */
- BUG_ON(!(inode->i_state & I_NEW));
-
- netfs_inode_init(&vnode->netfs, NULL);
- inode->i_size = 0;
- inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
- if (root) {
- inode->i_op = &afs_dynroot_inode_operations;
- inode->i_fop = &simple_dir_operations;
- } else {
- inode->i_op = &afs_autocell_inode_operations;
- }
- set_nlink(inode, 2);
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
- inode->i_blocks = 0;
- inode->i_generation = 0;
-
- set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
- if (!root) {
+ if (inode_state_read_once(inode) & I_NEW) {
+ netfs_inode_init(&vnode->netfs, NULL, false);
+ simple_inode_init_ts(inode);
+ set_nlink(inode, 2);
+ inode->i_size = 0;
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_autocell_inode_operations;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+ inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
+
+ set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
- inode->i_flags |= S_AUTOMOUNT;
- }
- inode->i_flags |= S_NOATIME;
- unlock_new_inode(inode);
+ unlock_new_inode(inode);
+ }
_leave(" = %p", inode);
return inode;
}
/*
- * Probe to see if a cell may exist. This prevents positive dentries from
- * being created unnecessarily.
+ * Try to automount the mountpoint with pseudo directory, if the autocell
+ * option is set.
*/
-static int afs_probe_cell_name(struct dentry *dentry)
+static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
- struct afs_cell *cell;
+ struct afs_cell *cell = NULL;
struct afs_net *net = afs_d2net(dentry);
+ struct inode *inode = NULL;
const char *name = dentry->d_name.name;
size_t len = dentry->d_name.len;
- int ret;
+ bool dotted = false;
+ int ret = -ENOENT;
/* Names prefixed with a dot are R/W mounts. */
if (name[0] == '.') {
- if (len == 1)
- return -EINVAL;
name++;
len--;
+ dotted = true;
}
- cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe);
- if (!IS_ERR(cell)) {
- afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe);
- return 0;
+ cell = afs_lookup_cell(net, name, len, NULL,
+ AFS_LOOKUP_CELL_DYNROOT,
+ afs_cell_trace_use_lookup_dynroot);
+ if (IS_ERR(cell)) {
+ ret = PTR_ERR(cell);
+ goto out_no_cell;
}
- ret = dns_query(net->net, "afsdb", name, len, "srv=1",
- NULL, NULL, false);
- if (ret == -ENODATA)
- ret = -EDESTADDRREQ;
- return ret;
-}
-
-/*
- * Try to auto mount the mountpoint with pseudo directory, if the autocell
- * operation is setted.
- */
-struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
-{
- struct afs_vnode *vnode = AFS_FS_I(dir);
- struct inode *inode;
- int ret = -ENOENT;
-
- _enter("%p{%pd}, {%llx:%llu}",
- dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
-
- if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
- goto out;
-
- ret = afs_probe_cell_name(dentry);
- if (ret < 0)
- goto out;
-
- inode = afs_iget_pseudo_dir(dir->i_sb, false);
+ inode = afs_iget_pseudo_dir(dir->i_sb, cell->dynroot_ino * 2 + dotted);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto out;
}
- _leave("= %p", inode);
- return inode;
+ dentry->d_fsdata = cell;
+ return d_splice_alias(inode, dentry);
out:
- _leave("= %d", ret);
+ afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_dynroot);
+out_no_cell:
+ if (!inode)
+ return d_splice_alias(inode, dentry);
return ret == -ENOENT ? NULL : ERR_PTR(ret);
}
/*
- * Look up @cell in a dynroot directory. This is a substitution for the
- * local cell name for the net namespace.
- */
-static struct dentry *afs_lookup_atcell(struct dentry *dentry)
-{
- struct afs_cell *cell;
- struct afs_net *net = afs_d2net(dentry);
- struct dentry *ret;
- char *name;
- int len;
-
- if (!net->ws_cell)
- return ERR_PTR(-ENOENT);
-
- ret = ERR_PTR(-ENOMEM);
- name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL);
- if (!name)
- goto out_p;
-
- down_read(&net->cells_lock);
- cell = net->ws_cell;
- if (cell) {
- len = cell->name_len;
- memcpy(name, cell->name, len + 1);
- }
- up_read(&net->cells_lock);
-
- ret = ERR_PTR(-ENOENT);
- if (!cell)
- goto out_n;
-
- ret = lookup_one_len(name, dentry->d_parent, len);
-
- /* We don't want to d_add() the @cell dentry here as we don't want to
- * the cached dentry to hide changes to the local cell name.
- */
-
-out_n:
- kfree(name);
-out_p:
- return ret;
-}
-
-/*
* Look up an entry in a dynroot directory.
*/
static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
@@ -223,8 +141,6 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
{
_enter("%pd", dentry);
- ASSERTCMP(d_inode(dentry), ==, NULL);
-
if (flags & LOOKUP_CREATE)
return ERR_PTR(-EOPNOTSUPP);
@@ -235,160 +151,256 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
if (dentry->d_name.len == 5 &&
memcmp(dentry->d_name.name, "@cell", 5) == 0)
- return afs_lookup_atcell(dentry);
+ return afs_lookup_atcell(dir, dentry, 2);
+
+ if (dentry->d_name.len == 6 &&
+ memcmp(dentry->d_name.name, ".@cell", 6) == 0)
+ return afs_lookup_atcell(dir, dentry, 3);
- return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry);
+ return afs_dynroot_lookup_cell(dir, dentry, flags);
}
const struct inode_operations afs_dynroot_inode_operations = {
.lookup = afs_dynroot_lookup,
};
-/*
- * Dirs in the dynamic root don't need revalidation.
- */
-static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
+static void afs_dynroot_d_release(struct dentry *dentry)
{
- return 1;
+ struct afs_cell *cell = dentry->d_fsdata;
+
+ afs_unuse_cell(cell, afs_cell_trace_unuse_dynroot_mntpt);
}
/*
- * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
- * sleep)
- * - called from dput() when d_count is going to 0.
- * - return 1 to request dentry be unhashed, 0 otherwise
+ * Keep @cell symlink dentries around, but only keep cell autodirs when they're
+ * being used.
*/
-static int afs_dynroot_d_delete(const struct dentry *dentry)
+static int afs_dynroot_delete_dentry(const struct dentry *dentry)
{
- return d_really_is_positive(dentry);
+ const struct qstr *name = &dentry->d_name;
+
+ if (name->len == 5 && memcmp(name->name, "@cell", 5) == 0)
+ return 0;
+ if (name->len == 6 && memcmp(name->name, ".@cell", 6) == 0)
+ return 0;
+ return 1;
}
const struct dentry_operations afs_dynroot_dentry_operations = {
- .d_revalidate = afs_dynroot_d_revalidate,
- .d_delete = afs_dynroot_d_delete,
- .d_release = afs_d_release,
+ .d_delete = afs_dynroot_delete_dentry,
+ .d_release = afs_dynroot_d_release,
.d_automount = afs_d_automount,
};
+static void afs_atcell_delayed_put_cell(void *arg)
+{
+ struct afs_cell *cell = arg;
+
+ afs_put_cell(cell, afs_cell_trace_put_atcell);
+}
+
/*
- * Create a manually added cell mount directory.
- * - The caller must hold net->proc_cells_lock
+ * Read @cell or .@cell symlinks.
*/
-int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell)
+static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
{
- struct super_block *sb = net->dynroot_sb;
- struct dentry *root, *subdir;
- int ret;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_cell *cell;
+ struct afs_net *net = afs_i2net(inode);
+ const char *name;
+ bool dotted = vnode->fid.vnode == 3;
- if (!sb || atomic_read(&sb->s_active) == 0)
- return 0;
+ if (!rcu_access_pointer(net->ws_cell))
+ return ERR_PTR(-ENOENT);
- /* Let the ->lookup op do the creation */
- root = sb->s_root;
- inode_lock(root->d_inode);
- subdir = lookup_one_len(cell->name, root, cell->name_len);
- if (IS_ERR(subdir)) {
- ret = PTR_ERR(subdir);
- goto unlock;
+ if (!dentry) {
+ /* We're in RCU-pathwalk. */
+ cell = rcu_dereference(net->ws_cell);
+ if (dotted)
+ name = cell->name - 1;
+ else
+ name = cell->name;
+ /* Shouldn't need to set a delayed call. */
+ return name;
}
- /* Note that we're retaining an extra ref on the dentry */
- subdir->d_fsdata = (void *)1UL;
- ret = 0;
-unlock:
- inode_unlock(root->d_inode);
- return ret;
+ down_read(&net->cells_lock);
+
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
+ if (dotted)
+ name = cell->name - 1;
+ else
+ name = cell->name;
+ afs_get_cell(cell, afs_cell_trace_get_atcell);
+ set_delayed_call(done, afs_atcell_delayed_put_cell, cell);
+
+ up_read(&net->cells_lock);
+ return name;
}
+static const struct inode_operations afs_atcell_inode_operations = {
+ .get_link = afs_atcell_get_link,
+};
+
/*
- * Remove a manually added cell mount directory.
- * - The caller must hold net->proc_cells_lock
+ * Create an inode for the @cell or .@cell symlinks.
*/
-void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell)
+static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino)
{
- struct super_block *sb = net->dynroot_sb;
- struct dentry *root, *subdir;
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ struct afs_fid fid = { .vnode = ino, .unique = 1, };
- if (!sb || atomic_read(&sb->s_active) == 0)
- return;
+ inode = iget5_locked(dir->i_sb, fid.vnode,
+ afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
- root = sb->s_root;
- inode_lock(root->d_inode);
+ vnode = AFS_FS_I(inode);
- /* Don't want to trigger a lookup call, which will re-add the cell */
- subdir = try_lookup_one_len(cell->name, root, cell->name_len);
- if (IS_ERR_OR_NULL(subdir)) {
- _debug("lookup %ld", PTR_ERR(subdir));
- goto no_dentry;
+ if (inode_state_read_once(inode) & I_NEW) {
+ netfs_inode_init(&vnode->netfs, NULL, false);
+ simple_inode_init_ts(inode);
+ set_nlink(inode, 1);
+ inode->i_size = 0;
+ inode->i_mode = S_IFLNK | 0555;
+ inode->i_op = &afs_atcell_inode_operations;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+ inode->i_flags |= S_NOATIME;
+
+ unlock_new_inode(inode);
}
+ return d_splice_alias(inode, dentry);
+}
+
+/*
+ * Transcribe the cell database into readdir content under the RCU read lock.
+ * Each cell produces two entries, one prefixed with a dot and one not.
+ */
+static int afs_dynroot_readdir_cells(struct afs_net *net, struct dir_context *ctx)
+{
+ const struct afs_cell *cell;
+ loff_t newpos;
+
+ _enter("%llu", ctx->pos);
+
+ for (;;) {
+ unsigned int ix = ctx->pos >> 1;
+
+ cell = idr_get_next(&net->cells_dyn_ino, &ix);
+ if (!cell)
+ return 0;
+ if (READ_ONCE(cell->state) == AFS_CELL_REMOVING ||
+ READ_ONCE(cell->state) == AFS_CELL_DEAD) {
+ ctx->pos += 2;
+ ctx->pos &= ~1;
+ continue;
+ }
- _debug("rmdir %pd %u", subdir, d_count(subdir));
+ newpos = ix << 1;
+ if (newpos > ctx->pos)
+ ctx->pos = newpos;
- if (subdir->d_fsdata) {
- _debug("unpin %u", d_count(subdir));
- subdir->d_fsdata = NULL;
- dput(subdir);
+ _debug("pos %llu -> cell %u", ctx->pos, cell->dynroot_ino);
+
+ if ((ctx->pos & 1) == 0) {
+ if (!dir_emit(ctx, cell->name, cell->name_len,
+ cell->dynroot_ino, DT_DIR))
+ return 0;
+ ctx->pos++;
+ }
+ if ((ctx->pos & 1) == 1) {
+ if (!dir_emit(ctx, cell->name - 1, cell->name_len + 1,
+ cell->dynroot_ino + 1, DT_DIR))
+ return 0;
+ ctx->pos++;
+ }
}
- dput(subdir);
-no_dentry:
- inode_unlock(root->d_inode);
- _leave("");
+ return 0;
}
/*
- * Populate a newly created dynamic root with cell names.
+ * Read the AFS dynamic root directory. This produces a list of cellnames,
+ * dotted and undotted, along with @cell and .@cell links if configured.
*/
-int afs_dynroot_populate(struct super_block *sb)
+static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx)
{
- struct afs_cell *cell;
- struct afs_net *net = afs_sb2net(sb);
- int ret;
+ struct afs_net *net = afs_d2net(file->f_path.dentry);
+ int ret = 0;
- mutex_lock(&net->proc_cells_lock);
+ if (!dir_emit_dots(file, ctx))
+ return 0;
- net->dynroot_sb = sb;
- hlist_for_each_entry(cell, &net->proc_cells, proc_link) {
- ret = afs_dynroot_mkdir(net, cell);
- if (ret < 0)
- goto error;
+ if (ctx->pos == 2) {
+ if (rcu_access_pointer(net->ws_cell) &&
+ !dir_emit(ctx, "@cell", 5, 2, DT_LNK))
+ return 0;
+ ctx->pos = 3;
+ }
+ if (ctx->pos == 3) {
+ if (rcu_access_pointer(net->ws_cell) &&
+ !dir_emit(ctx, ".@cell", 6, 3, DT_LNK))
+ return 0;
+ ctx->pos = 4;
}
- ret = 0;
-out:
- mutex_unlock(&net->proc_cells_lock);
+ if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) {
+ down_read(&net->cells_lock);
+ ret = afs_dynroot_readdir_cells(net, ctx);
+ up_read(&net->cells_lock);
+ }
return ret;
-
-error:
- net->dynroot_sb = NULL;
- goto out;
}
+static const struct file_operations afs_dynroot_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = afs_dynroot_readdir,
+ .fsync = noop_fsync,
+};
+
/*
- * When a dynamic root that's in the process of being destroyed, depopulate it
- * of pinned directories.
+ * Create an inode for a dynamic root directory.
*/
-void afs_dynroot_depopulate(struct super_block *sb)
+struct inode *afs_dynroot_iget_root(struct super_block *sb)
{
- struct afs_net *net = afs_sb2net(sb);
- struct dentry *root = sb->s_root, *subdir, *tmp;
-
- /* Prevent more subdirs from being created */
- mutex_lock(&net->proc_cells_lock);
- if (net->dynroot_sb == sb)
- net->dynroot_sb = NULL;
- mutex_unlock(&net->proc_cells_lock);
-
- if (root) {
- inode_lock(root->d_inode);
-
- /* Remove all the pins for dirs created for manually added cells */
- list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) {
- if (subdir->d_fsdata) {
- subdir->d_fsdata = NULL;
- dput(subdir);
- }
- }
+ struct afs_super_info *as = AFS_FS_S(sb);
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ struct afs_fid fid = { .vid = 0, .vnode = 1, .unique = 1,};
+
+ if (as->volume)
+ fid.vid = as->volume->vid;
+
+ inode = iget5_locked(sb, fid.vnode,
+ afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ vnode = AFS_FS_I(inode);
- inode_unlock(root->d_inode);
+ /* there shouldn't be an existing inode */
+ if (inode_state_read_once(inode) & I_NEW) {
+ netfs_inode_init(&vnode->netfs, NULL, false);
+ simple_inode_init_ts(inode);
+ set_nlink(inode, 2);
+ inode->i_size = 0;
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_dynroot_inode_operations;
+ inode->i_fop = &afs_dynroot_file_operations;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+ inode->i_flags |= S_NOATIME;
+
+ set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
+ unlock_new_inode(inode);
}
+ _leave(" = %p", inode);
+ return inode;
}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 68d6d5dc608d..f66a92294284 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -16,15 +16,15 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "internal.h"
-static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
-static int afs_symlink_read_folio(struct file *file, struct folio *folio);
-static void afs_invalidate_folio(struct folio *folio, size_t offset,
- size_t length);
-static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags);
+static int afs_file_mmap_prepare(struct vm_area_desc *desc);
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags);
static void afs_vm_open(struct vm_area_struct *area);
static void afs_vm_close(struct vm_area_struct *area);
static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);
@@ -34,9 +34,9 @@ const struct file_operations afs_file_operations = {
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = afs_file_read_iter,
- .write_iter = afs_file_write,
- .mmap = afs_file_mmap,
- .splice_read = generic_file_splice_read,
+ .write_iter = netfs_file_write_iter,
+ .mmap_prepare = afs_file_mmap_prepare,
+ .splice_read = afs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = afs_fsync,
.lock = afs_lock,
@@ -50,23 +50,14 @@ const struct inode_operations afs_file_inode_operations = {
};
const struct address_space_operations afs_file_aops = {
+ .direct_IO = noop_direct_IO,
.read_folio = netfs_read_folio,
.readahead = netfs_readahead,
- .dirty_folio = afs_dirty_folio,
- .launder_folio = afs_launder_folio,
- .release_folio = afs_release_folio,
- .invalidate_folio = afs_invalidate_folio,
- .write_begin = afs_write_begin,
- .write_end = afs_write_end,
- .writepages = afs_writepages,
- .migrate_folio = filemap_migrate_folio,
-};
-
-const struct address_space_operations afs_symlink_aops = {
- .read_folio = afs_symlink_read_folio,
- .release_folio = afs_release_folio,
- .invalidate_folio = afs_invalidate_folio,
+ .dirty_folio = netfs_dirty_folio,
+ .release_folio = netfs_release_folio,
+ .invalidate_folio = netfs_invalidate_folio,
.migrate_folio = filemap_migrate_folio,
+ .writepages = afs_writepages,
};
static const struct vm_operations_struct afs_vm_ops = {
@@ -209,50 +200,12 @@ int afs_release(struct inode *inode, struct file *file)
return ret;
}
-/*
- * Allocate a new read record.
- */
-struct afs_read *afs_alloc_read(gfp_t gfp)
-{
- struct afs_read *req;
-
- req = kzalloc(sizeof(struct afs_read), gfp);
- if (req)
- refcount_set(&req->usage, 1);
-
- return req;
-}
-
-/*
- * Dispose of a ref to a read record.
- */
-void afs_put_read(struct afs_read *req)
-{
- if (refcount_dec_and_test(&req->usage)) {
- if (req->cleanup)
- req->cleanup(req);
- key_put(req->key);
- kfree(req);
- }
-}
-
static void afs_fetch_data_notify(struct afs_operation *op)
{
- struct afs_read *req = op->fetch.req;
- struct netfs_io_subrequest *subreq = req->subreq;
- int error = op->error;
-
- if (error == -ECONNABORTED)
- error = afs_abort_to_error(op->ac.abort_code);
- req->error = error;
-
- if (subreq) {
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
- req->subreq = NULL;
- } else if (req->done) {
- req->done(req);
- }
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
+
+ subreq->error = afs_op_error(op);
+ netfs_read_subreq_terminated(subreq);
}
static void afs_fetch_data_success(struct afs_operation *op)
@@ -262,117 +215,199 @@ static void afs_fetch_data_success(struct afs_operation *op)
_enter("op=%08x", op->debug_id);
afs_vnode_commit_status(op, &op->file[0]);
afs_stat_v(vnode, n_fetches);
- atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
+ atomic_long_add(op->fetch.subreq->transferred, &op->net->n_fetch_bytes);
afs_fetch_data_notify(op);
}
-static void afs_fetch_data_put(struct afs_operation *op)
+static void afs_fetch_data_aborted(struct afs_operation *op)
{
- op->fetch.req->error = op->error;
- afs_put_read(op->fetch.req);
+ afs_check_for_remote_deletion(op);
+ afs_fetch_data_notify(op);
}
-static const struct afs_operation_ops afs_fetch_data_operation = {
+const struct afs_operation_ops afs_fetch_data_operation = {
.issue_afs_rpc = afs_fs_fetch_data,
.issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success,
- .aborted = afs_check_for_remote_deletion,
+ .aborted = afs_fetch_data_aborted,
.failed = afs_fetch_data_notify,
- .put = afs_fetch_data_put,
};
+static void afs_issue_read_call(struct afs_operation *op)
+{
+ op->call_responded = false;
+ op->call_error = 0;
+ op->call_abort_code = 0;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags))
+ yfs_fs_fetch_data(op);
+ else
+ afs_fs_fetch_data(op);
+}
+
+static void afs_end_read(struct afs_operation *op)
+{
+ if (op->call_responded && op->server)
+ set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+ if (!afs_op_error(op))
+ afs_fetch_data_success(op);
+ else if (op->cumul_error.aborted)
+ afs_fetch_data_aborted(op);
+ else
+ afs_fetch_data_notify(op);
+
+ afs_end_vnode_operation(op);
+ afs_put_operation(op);
+}
+
+/*
+ * Perform I/O processing on an asynchronous call. The work item carries a ref
+ * to the call struct that we either need to release or to pass on.
+ */
+static void afs_read_receive(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ enum afs_call_state state;
+
+ _enter("");
+
+ state = READ_ONCE(call->state);
+ if (state == AFS_CALL_COMPLETE)
+ return;
+ trace_afs_read_recv(op, call);
+
+ while (state < AFS_CALL_COMPLETE && READ_ONCE(call->need_attention)) {
+ WRITE_ONCE(call->need_attention, false);
+ afs_deliver_to_call(call);
+ state = READ_ONCE(call->state);
+ }
+
+ if (state < AFS_CALL_COMPLETE) {
+ netfs_read_subreq_progress(op->fetch.subreq);
+ if (rxrpc_kernel_check_life(call->net->socket, call->rxcall))
+ return;
+ /* rxrpc terminated the call. */
+ afs_set_call_complete(call, call->error, call->abort_code);
+ }
+
+ op->call_abort_code = call->abort_code;
+ op->call_error = call->error;
+ op->call_responded = call->responded;
+ op->call = NULL;
+ call->op = NULL;
+ afs_put_call(call);
+
+ /* If the call failed, then we need to crank the server rotation
+ * handle and try the next.
+ */
+ if (afs_select_fileserver(op)) {
+ afs_issue_read_call(op);
+ return;
+ }
+
+ afs_end_read(op);
+}
+
+void afs_fetch_data_async_rx(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
+ afs_read_receive(call);
+ afs_put_call(call);
+}
+
+void afs_fetch_data_immediate_cancel(struct afs_call *call)
+{
+ if (call->async) {
+ afs_get_call(call, afs_call_trace_wake);
+ if (!queue_work(afs_async_calls, &call->async_work))
+ afs_deferred_put_call(call);
+ flush_work(&call->async_work);
+ }
+}
+
/*
* Fetch file data from the volume.
*/
-int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
+static void afs_issue_read(struct netfs_io_subrequest *subreq)
{
struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+ struct key *key = subreq->rreq->netfs_priv;
_enter("%s{%llx:%llu.%u},%x,,,",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- key_serial(req->key));
+ key_serial(key));
- op = afs_alloc_operation(req->key, vnode->volume);
+ op = afs_alloc_operation(key, vnode->volume);
if (IS_ERR(op)) {
- if (req->subreq)
- netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
- return PTR_ERR(op);
+ subreq->error = PTR_ERR(op);
+ netfs_read_subreq_terminated(subreq);
+ return;
}
afs_op_set_vnode(op, 0, vnode);
- op->fetch.req = afs_get_read(req);
+ op->fetch.subreq = subreq;
op->ops = &afs_fetch_data_operation;
- return afs_do_sync_operation(op);
-}
-
-static void afs_issue_read(struct netfs_io_subrequest *subreq)
-{
- struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
- struct afs_read *fsreq;
- fsreq = afs_alloc_read(GFP_NOFS);
- if (!fsreq)
- return netfs_subreq_terminated(subreq, -ENOMEM, false);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- fsreq->subreq = subreq;
- fsreq->pos = subreq->start + subreq->transferred;
- fsreq->len = subreq->len - subreq->transferred;
- fsreq->key = key_get(subreq->rreq->netfs_priv);
- fsreq->vnode = vnode;
- fsreq->iter = &fsreq->def_iter;
+ if (subreq->rreq->origin == NETFS_READAHEAD ||
+ subreq->rreq->iocb) {
+ op->flags |= AFS_OPERATION_ASYNC;
- iov_iter_xarray(&fsreq->def_iter, ITER_DEST,
- &fsreq->vnode->netfs.inode.i_mapping->i_pages,
- fsreq->pos, fsreq->len);
+ if (!afs_begin_vnode_operation(op)) {
+ subreq->error = afs_put_operation(op);
+ netfs_read_subreq_terminated(subreq);
+ return;
+ }
- afs_fetch_data(fsreq->vnode, fsreq);
- afs_put_read(fsreq);
-}
+ if (!afs_select_fileserver(op)) {
+ afs_end_read(op);
+ return;
+ }
-static int afs_symlink_read_folio(struct file *file, struct folio *folio)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host);
- struct afs_read *fsreq;
- int ret;
-
- fsreq = afs_alloc_read(GFP_NOFS);
- if (!fsreq)
- return -ENOMEM;
-
- fsreq->pos = folio_pos(folio);
- fsreq->len = folio_size(folio);
- fsreq->vnode = vnode;
- fsreq->iter = &fsreq->def_iter;
- iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages,
- fsreq->pos, fsreq->len);
-
- ret = afs_fetch_data(fsreq->vnode, fsreq);
- if (ret == 0)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return ret;
+ afs_issue_read_call(op);
+ } else {
+ afs_do_sync_operation(op);
+ }
}
static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- rreq->netfs_priv = key_get(afs_file_key(file));
- return 0;
-}
-
-static int afs_begin_cache_operation(struct netfs_io_request *rreq)
-{
-#ifdef CONFIG_AFS_FSCACHE
struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
- return fscache_begin_read_operation(&rreq->cache_resources,
- afs_vnode_cache(vnode));
-#else
- return -ENOBUFS;
-#endif
+ if (file)
+ rreq->netfs_priv = key_get(afs_file_key(file));
+ rreq->rsize = 256 * 1024;
+ rreq->wsize = 256 * 1024 * 1024;
+
+ switch (rreq->origin) {
+ case NETFS_READ_SINGLE:
+ if (!file) {
+ struct key *key = afs_request_key(vnode->volume->cell);
+
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ rreq->netfs_priv = key;
+ }
+ break;
+ case NETFS_WRITEBACK:
+ case NETFS_WRITETHROUGH:
+ case NETFS_UNBUFFERED_WRITE:
+ case NETFS_DIO_WRITE:
+ if (S_ISREG(rreq->inode->i_mode))
+ rreq->io_streams[0].avail = true;
+ break;
+ case NETFS_WRITEBACK_SINGLE:
+ default:
+ break;
+ }
+ return 0;
}
static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
@@ -386,171 +421,87 @@ static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
static void afs_free_request(struct netfs_io_request *rreq)
{
key_put(rreq->netfs_priv);
+ afs_put_wb_key(rreq->netfs_priv2);
}
-const struct netfs_request_ops afs_req_ops = {
- .init_request = afs_init_request,
- .free_request = afs_free_request,
- .begin_cache_operation = afs_begin_cache_operation,
- .check_write_begin = afs_check_write_begin,
- .issue_read = afs_issue_read,
-};
-
-int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode)));
- return 0;
-}
-
-/*
- * Adjust the dirty region of the page on truncation or full invalidation,
- * getting rid of the markers altogether if the region is entirely invalidated.
- */
-static void afs_invalidate_dirty(struct folio *folio, size_t offset,
- size_t length)
+static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
{
- struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
- unsigned long priv;
- unsigned int f, t, end = offset + length;
-
- priv = (unsigned long)folio_get_private(folio);
-
- /* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == folio_size(folio))
- goto full_invalidate;
-
- /* If the page was dirtied by page_mkwrite(), the PTE stays writable
- * and we don't get another notification to tell us to expand it
- * again.
- */
- if (afs_is_folio_dirty_mmapped(priv))
- return;
-
- /* We may need to shorten the dirty region */
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
-
- if (t <= offset || f >= end)
- return; /* Doesn't overlap */
-
- if (f < offset && t > end)
- return; /* Splits the dirty region - just absorb it */
-
- if (f >= offset && t <= end)
- goto undirty;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ loff_t i_size;
- if (f < offset)
- t = offset;
- else
- f = end;
- if (f == t)
- goto undirty;
-
- priv = afs_folio_dirty(folio, f, t);
- folio_change_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
- return;
-
-undirty:
- trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
- folio_clear_dirty_for_io(folio);
-full_invalidate:
- trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
- folio_detach_private(folio);
+ write_seqlock(&vnode->cb_lock);
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (new_i_size > i_size) {
+ i_size_write(&vnode->netfs.inode, new_i_size);
+ inode_set_bytes(&vnode->netfs.inode, new_i_size);
+ }
+ write_sequnlock(&vnode->cb_lock);
+ fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);
}
-/*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- * the entire page)
- */
-static void afs_invalidate_folio(struct folio *folio, size_t offset,
- size_t length)
+static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)
{
- _enter("{%lu},%zu,%zu", folio->index, offset, length);
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
- BUG_ON(!folio_test_locked(folio));
-
- if (folio_get_private(folio))
- afs_invalidate_dirty(folio, offset, length);
-
- folio_wait_fscache(folio);
- _leave("");
+ afs_invalidate_cache(vnode, 0);
}
-/*
- * release a page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
- */
-static bool afs_release_folio(struct folio *folio, gfp_t gfp)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{{%llx:%llu}[%lu],%lx},%x",
- vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
- gfp);
-
- /* deny if folio is being written to the cache and the caller hasn't
- * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
- if (folio_test_fscache(folio)) {
- if (current_is_kswapd() || !(gfp & __GFP_FS))
- return false;
- folio_wait_fscache(folio);
- }
- fscache_note_page_release(afs_vnode_cache(vnode));
-#endif
-
- if (folio_test_private(folio)) {
- trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
- folio_detach_private(folio);
- }
-
- /* Indicate that the folio can be released */
- _leave(" = T");
- return true;
-}
+const struct netfs_request_ops afs_req_ops = {
+ .init_request = afs_init_request,
+ .free_request = afs_free_request,
+ .check_write_begin = afs_check_write_begin,
+ .issue_read = afs_issue_read,
+ .update_i_size = afs_update_i_size,
+ .invalidate_cache = afs_netfs_invalidate_cache,
+ .begin_writeback = afs_begin_writeback,
+ .prepare_write = afs_prepare_write,
+ .issue_write = afs_issue_write,
+ .retry_request = afs_retry_request,
+};
static void afs_add_open_mmap(struct afs_vnode *vnode)
{
if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
- down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ down_write(&vnode->volume->open_mmaps_lock);
if (list_empty(&vnode->cb_mmap_link))
- list_add_tail(&vnode->cb_mmap_link,
- &vnode->volume->cell->fs_open_mmaps);
+ list_add_tail(&vnode->cb_mmap_link, &vnode->volume->open_mmaps);
- up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ up_write(&vnode->volume->open_mmaps_lock);
}
}
static void afs_drop_open_mmap(struct afs_vnode *vnode)
{
- if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
+ if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1))
return;
- down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ down_write(&vnode->volume->open_mmaps_lock);
- if (atomic_read(&vnode->cb_nr_mmap) == 0)
+ read_seqlock_excl(&vnode->cb_lock);
+ // the only place where ->cb_nr_mmap may hit 0
+ // see __afs_break_callback() for the other side...
+ if (atomic_dec_and_test(&vnode->cb_nr_mmap))
list_del_init(&vnode->cb_mmap_link);
+ read_sequnlock_excl(&vnode->cb_lock);
- up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ up_write(&vnode->volume->open_mmaps_lock);
flush_work(&vnode->cb_work);
}
/*
* Handle setting up a memory mapping on an AFS file.
*/
-static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int afs_file_mmap_prepare(struct vm_area_desc *desc)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file));
int ret;
afs_add_open_mmap(vnode);
- ret = generic_file_mmap(file, vma);
+ ret = generic_file_mmap_prepare(desc);
if (ret == 0)
- vma->vm_ops = &afs_vm_ops;
+ desc->vm_ops = &afs_vm_ops;
else
afs_drop_open_mmap(vnode);
return ret;
@@ -569,31 +520,47 @@ static void afs_vm_close(struct vm_area_struct *vma)
static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
- struct afs_file *af = vmf->vma->vm_file->private_data;
- switch (afs_validate(vnode, af->key)) {
- case 0:
+ if (afs_check_validity(vnode))
return filemap_map_pages(vmf, start_pgoff, end_pgoff);
- case -ENOMEM:
- return VM_FAULT_OOM;
- case -EINTR:
- case -ERESTARTSYS:
- return VM_FAULT_RETRY;
- case -ESTALE:
- default:
- return VM_FAULT_SIGBUS;
- }
+ return 0;
}
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_file *af = iocb->ki_filp->private_data;
- int ret;
+ ssize_t ret;
- ret = afs_validate(vnode, af->key);
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return netfs_unbuffered_read_iter(iocb, iter);
+
+ ret = netfs_start_io_read(inode);
if (ret < 0)
return ret;
+ ret = afs_validate(vnode, af->key);
+ if (ret == 0)
+ ret = filemap_read(iocb, iter, 0);
+ netfs_end_io_read(inode);
+ return ret;
+}
- return generic_file_read_iter(iocb, iter);
+static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode = file_inode(in);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_file *af = in->private_data;
+ ssize_t ret;
+
+ ret = netfs_start_io_read(inode);
+ if (ret < 0)
+ return ret;
+ ret = afs_validate(vnode, af->key);
+ if (ret == 0)
+ ret = filemap_splice_read(in, ppos, pipe, len, flags);
+ netfs_end_io_read(inode);
+ return ret;
}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index bbcc5afd1576..f0e96a35093f 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -93,13 +93,13 @@ static void afs_grant_locks(struct afs_vnode *vnode)
bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
- if (!exclusive && p->fl_type == F_WRLCK)
+ if (!exclusive && lock_is_write(p))
continue;
list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
p->fl_u.afs.state = AFS_LOCK_GRANTED;
trace_afs_flock_op(vnode, p, afs_flock_op_grant);
- wake_up(&p->fl_wait);
+ locks_wake_up(p);
}
}
@@ -112,25 +112,24 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
{
struct file_lock *p, *_p, *next = NULL;
struct key *key = vnode->lock_key;
- unsigned int fl_type = F_RDLCK;
+ unsigned int type = F_RDLCK;
_enter("");
if (vnode->lock_type == AFS_LOCK_WRITE)
- fl_type = F_WRLCK;
+ type = F_WRLCK;
list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
if (error &&
- p->fl_type == fl_type &&
- afs_file_key(p->fl_file) == key) {
+ p->c.flc_type == type &&
+ afs_file_key(p->c.flc_file) == key) {
list_del_init(&p->fl_u.afs.link);
p->fl_u.afs.state = error;
- wake_up(&p->fl_wait);
+ locks_wake_up(p);
}
/* Select the next locker to hand off to. */
- if (next &&
- (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK))
+ if (next && (lock_is_write(next) || lock_is_read(p)))
continue;
next = p;
}
@@ -142,7 +141,7 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
trace_afs_flock_op(vnode, next, afs_flock_op_wake);
- wake_up(&next->fl_wait);
+ locks_wake_up(next);
} else {
afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
@@ -166,7 +165,7 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
struct file_lock, fl_u.afs.link);
list_del_init(&p->fl_u.afs.link);
p->fl_u.afs.state = -ENOENT;
- wake_up(&p->fl_wait);
+ locks_wake_up(p);
}
key_put(vnode->lock_key);
@@ -451,7 +450,7 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
*/
static int afs_do_setlk(struct file *file, struct file_lock *fl)
{
- struct inode *inode = locks_inode(file);
+ struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
afs_lock_type_t type;
@@ -464,14 +463,14 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
_enter("{%llx:%llu},%llu-%llu,%u,%u",
vnode->fid.vid, vnode->fid.vnode,
- fl->fl_start, fl->fl_end, fl->fl_type, mode);
+ fl->fl_start, fl->fl_end, fl->c.flc_type, mode);
fl->fl_ops = &afs_lock_ops;
INIT_LIST_HEAD(&fl->fl_u.afs.link);
fl->fl_u.afs.state = AFS_LOCK_PENDING;
partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
- type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+ type = lock_is_read(fl) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
if (mode == afs_flock_mode_write && partial)
type = AFS_LOCK_WRITE;
@@ -524,7 +523,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
}
if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
- !(fl->fl_flags & FL_SLEEP)) {
+ !(fl->c.flc_flags & FL_SLEEP)) {
ret = -EAGAIN;
if (type == AFS_LOCK_READ) {
if (vnode->status.lock_count == -1)
@@ -621,7 +620,7 @@ skip_server_lock:
return 0;
lock_is_contended:
- if (!(fl->fl_flags & FL_SLEEP)) {
+ if (!(fl->c.flc_flags & FL_SLEEP)) {
list_del_init(&fl->fl_u.afs.link);
afs_next_locker(vnode, 0);
ret = -EAGAIN;
@@ -641,7 +640,7 @@ need_to_wait:
spin_unlock(&vnode->lock);
trace_afs_flock_ev(vnode, fl, afs_flock_waiting, 0);
- ret = wait_event_interruptible(fl->fl_wait,
+ ret = wait_event_interruptible(fl->c.flc_wait,
fl->fl_u.afs.state != AFS_LOCK_PENDING);
trace_afs_flock_ev(vnode, fl, afs_flock_waited, ret);
@@ -701,10 +700,11 @@ error:
*/
static int afs_do_unlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
int ret;
- _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+ _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode,
+ fl->c.flc_type);
trace_afs_flock_op(vnode, fl, afs_flock_op_unlock);
@@ -721,7 +721,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
*/
static int afs_do_getlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct key *key = afs_file_key(file);
int ret, lock_count;
@@ -730,11 +730,11 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
return -ENOENT;
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
/* check local lock records first */
posix_test_lock(file, fl);
- if (fl->fl_type == F_UNLCK) {
+ if (lock_is_unlock(fl)) {
/* no local locks; consult the server */
ret = afs_fetch_status(vnode, key, false, NULL);
if (ret < 0)
@@ -743,18 +743,18 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
lock_count = READ_ONCE(vnode->status.lock_count);
if (lock_count != 0) {
if (lock_count > 0)
- fl->fl_type = F_RDLCK;
+ fl->c.flc_type = F_RDLCK;
else
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
fl->fl_start = 0;
fl->fl_end = OFFSET_MAX;
- fl->fl_pid = 0;
+ fl->c.flc_pid = 0;
}
}
ret = 0;
error:
- _leave(" = %d [%hd]", ret, fl->fl_type);
+ _leave(" = %d [%hd]", ret, fl->c.flc_type);
return ret;
}
@@ -763,13 +763,13 @@ error:
*/
int afs_lock(struct file *file, int cmd, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
enum afs_flock_operation op;
int ret;
_enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
vnode->fid.vid, vnode->fid.vnode, cmd,
- fl->fl_type, fl->fl_flags,
+ fl->c.flc_type, fl->c.flc_flags,
(long long) fl->fl_start, (long long) fl->fl_end);
if (IS_GETLK(cmd))
@@ -778,7 +778,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
trace_afs_flock_op(vnode, fl, afs_flock_op_lock);
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
ret = afs_do_unlk(file, fl);
else
ret = afs_do_setlk(file, fl);
@@ -798,13 +798,13 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
*/
int afs_flock(struct file *file, int cmd, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
enum afs_flock_operation op;
int ret;
_enter("{%llx:%llu},%d,{t=%x,fl=%x}",
vnode->fid.vid, vnode->fid.vnode, cmd,
- fl->fl_type, fl->fl_flags);
+ fl->c.flc_type, fl->c.flc_flags);
/*
* No BSD flocks over NFS allowed.
@@ -813,14 +813,14 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
* Not sure whether that would be unique, though, or whether
* that would break in other places.
*/
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
trace_afs_flock_op(vnode, fl, afs_flock_op_flock);
/* we're simulating flock() locks using posix locks on the server */
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
ret = afs_do_unlk(file, fl);
else
ret = afs_do_setlk(file, fl);
@@ -843,7 +843,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
*/
static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->c.flc_file));
_enter("");
@@ -861,7 +861,7 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
*/
static void afs_fl_release_private(struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->c.flc_file));
_enter("");
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 7a3803ce3a22..8418813ee043 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -35,18 +35,119 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo
key_get(key);
}
- op->key = key;
- op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op);
- op->net = volume->cell->net;
- op->cb_v_break = volume->cb_v_break;
- op->debug_id = atomic_inc_return(&afs_operation_debug_counter);
- op->error = -EDESTADDRREQ;
- op->ac.error = SHRT_MAX;
+ op->key = key;
+ op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op);
+ op->net = volume->cell->net;
+ op->cb_v_break = atomic_read(&volume->cb_v_break);
+ op->pre_volsync.creation = volume->creation_time;
+ op->pre_volsync.update = volume->update_time;
+ op->debug_id = atomic_inc_return(&afs_operation_debug_counter);
+ op->nr_iterations = -1;
+ afs_op_set_error(op, -EDESTADDRREQ);
_leave(" = [op=%08x]", op->debug_id);
return op;
}
+struct afs_io_locker {
+ struct list_head link;
+ struct task_struct *task;
+ unsigned long have_lock;
+};
+
+/*
+ * Unlock the I/O lock on a vnode.
+ */
+static void afs_unlock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker *locker;
+
+ spin_lock(&vnode->lock);
+ locker = list_first_entry_or_null(&vnode->io_lock_waiters,
+ struct afs_io_locker, link);
+ if (locker) {
+ list_del(&locker->link);
+ smp_store_release(&locker->have_lock, 1); /* The unlock barrier. */
+ smp_mb__after_atomic(); /* Store have_lock before task state */
+ wake_up_process(locker->task);
+ } else {
+ clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
+ }
+ spin_unlock(&vnode->lock);
+}
+
+/*
+ * Lock the I/O lock on a vnode uninterruptibly. We can't use an ordinary
+ * mutex as lockdep will complain if we unlock it in the wrong thread.
+ */
+static void afs_lock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock)) /* The lock barrier */
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Lock the I/O lock on a vnode interruptibly. We can't use an ordinary mutex
+ * as lockdep will complain if we unlock it in the wrong thread.
+ */
+static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+ int ret = 0;
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return 0;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock) || /* The lock barrier */
+ signal_pending(current))
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ /* If we got a signal, try to transfer the lock onto the next
+ * waiter.
+ */
+ if (unlikely(signal_pending(current))) {
+ spin_lock(&vnode->lock);
+ if (myself.have_lock) {
+ spin_unlock(&vnode->lock);
+ afs_unlock_for_io(vnode);
+ } else {
+ list_del(&myself.link);
+ spin_unlock(&vnode->lock);
+ }
+ ret = -ERESTARTSYS;
+ }
+ return ret;
+}
+
/*
* Lock the vnode(s) being operated upon.
*/
@@ -58,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_UNINTR) {
- mutex_lock(&vnode->io_lock);
+ afs_lock_for_io(vnode);
op->flags |= AFS_OPERATION_LOCK_0;
_leave(" = t [1]");
return true;
@@ -70,8 +171,8 @@ static bool afs_get_io_locks(struct afs_operation *op)
if (vnode2 > vnode)
swap(vnode, vnode2);
- if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
- op->error = -ERESTARTSYS;
+ if (afs_lock_for_io_interruptible(vnode) < 0) {
+ afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
_leave(" = f [I 0]");
return false;
@@ -79,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op)
op->flags |= AFS_OPERATION_LOCK_0;
if (vnode2) {
- if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
- op->error = -ERESTARTSYS;
+ if (afs_lock_for_io_interruptible(vnode2) < 0) {
+ afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
op->flags &= ~AFS_OPERATION_LOCK_0;
_leave(" = f [I 1]");
return false;
@@ -102,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_LOCK_1)
- mutex_unlock(&vnode2->io_lock);
+ afs_unlock_for_io(vnode2);
if (op->flags & AFS_OPERATION_LOCK_0)
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
}
static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
@@ -147,7 +248,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
afs_prepare_vnode(op, &op->file[0], 0);
afs_prepare_vnode(op, &op->file[1], 1);
- op->cb_v_break = op->volume->cb_v_break;
+ op->cb_v_break = atomic_read(&op->volume->cb_v_break);
_leave(" = true");
return true;
}
@@ -155,20 +256,20 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
/*
* Tidy up a filesystem cursor and unlock the vnode.
*/
-static void afs_end_vnode_operation(struct afs_operation *op)
+void afs_end_vnode_operation(struct afs_operation *op)
{
_enter("");
- if (op->error == -EDESTADDRREQ ||
- op->error == -EADDRNOTAVAIL ||
- op->error == -ENETUNREACH ||
- op->error == -EHOSTUNREACH)
+ switch (afs_op_error(op)) {
+ case -EDESTADDRREQ:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
afs_dump_edestaddrreq(op);
+ break;
+ }
afs_drop_io_locks(op);
-
- if (op->error == -ECONNABORTED)
- op->error = afs_abort_to_error(op->ac.abort_code);
}
/*
@@ -179,37 +280,43 @@ void afs_wait_for_operation(struct afs_operation *op)
_enter("");
while (afs_select_fileserver(op)) {
- op->cb_s_break = op->server->cb_s_break;
+ op->call_responded = false;
+ op->call_error = 0;
+ op->call_abort_code = 0;
if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
op->ops->issue_yfs_rpc)
op->ops->issue_yfs_rpc(op);
else if (op->ops->issue_afs_rpc)
op->ops->issue_afs_rpc(op);
else
- op->ac.error = -ENOTSUPP;
-
- if (op->call)
- op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
+ op->call_error = -ENOTSUPP;
+
+ if (op->call) {
+ afs_wait_for_call_to_complete(op->call);
+ op->call_abort_code = op->call->abort_code;
+ op->call_error = op->call->error;
+ op->call_responded = op->call->responded;
+ afs_put_call(op->call);
+ }
}
- switch (op->error) {
- case 0:
+ if (op->call_responded && op->server)
+ set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+ if (!afs_op_error(op)) {
_debug("success");
op->ops->success(op);
- break;
- case -ECONNABORTED:
+ } else if (op->cumul_error.aborted) {
if (op->ops->aborted)
op->ops->aborted(op);
- fallthrough;
- default:
+ } else {
if (op->ops->failed)
op->ops->failed(op);
- break;
}
afs_end_vnode_operation(op);
- if (op->error == 0 && op->ops->edit_dir) {
+ if (!afs_op_error(op) && op->ops->edit_dir) {
_debug("edit_dir");
op->ops->edit_dir(op);
}
@@ -221,7 +328,8 @@ void afs_wait_for_operation(struct afs_operation *op)
*/
int afs_put_operation(struct afs_operation *op)
{
- int i, ret = op->error;
+ struct afs_addr_list *alist;
+ int i, ret = afs_op_error(op);
_enter("op=%08x,%d", op->debug_id, ret);
@@ -243,9 +351,19 @@ int afs_put_operation(struct afs_operation *op)
kfree(op->more_files);
}
- afs_end_cursor(&op->ac);
+ if (op->estate) {
+ alist = op->estate->addresses;
+ if (alist) {
+ if (op->call_responded &&
+ op->addr_index != alist->preferred &&
+ test_bit(alist->preferred, &op->addr_tried))
+ WRITE_ONCE(alist->preferred, op->addr_index);
+ }
+ }
+
+ afs_clear_server_states(op);
afs_put_serverlist(op->net, op->server_list);
- afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op);
+ afs_put_volume(op->volume, afs_volume_trace_put_put_op);
key_put(op->key);
kfree(op);
return ret;
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index daaf3810cc92..e0030ac74ea0 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -15,6 +15,42 @@
static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
+struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
+ enum afs_estate_trace where)
+{
+ if (estate) {
+ int r;
+
+ __refcount_inc(&estate->ref, &r);
+ trace_afs_estate(estate->server_id, estate->probe_seq, r, where);
+ }
+ return estate;
+}
+
+static void afs_endpoint_state_rcu(struct rcu_head *rcu)
+{
+ struct afs_endpoint_state *estate = container_of(rcu, struct afs_endpoint_state, rcu);
+
+ trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+ afs_estate_trace_free);
+ afs_put_addrlist(estate->addresses, afs_alist_trace_put_estate);
+ kfree(estate);
+}
+
+void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where)
+{
+ if (estate) {
+ unsigned int server_id = estate->server_id, probe_seq = estate->probe_seq;
+ bool dead;
+ int r;
+
+ dead = __refcount_dec_and_test(&estate->ref, &r);
+ trace_afs_estate(server_id, probe_seq, r, where);
+ if (dead)
+ call_rcu(&estate->rcu, afs_endpoint_state_rcu);
+ }
+}
+
/*
* Start the probe polling timer. We have to supply it with an inc on the
* outstanding server count.
@@ -38,9 +74,10 @@ static void afs_schedule_fs_probe(struct afs_net *net,
/*
* Handle the completion of a set of probes.
*/
-static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
+static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server,
+ struct afs_endpoint_state *estate)
{
- bool responded = server->probe.responded;
+ bool responded = test_bit(AFS_ESTATE_RESPONDED, &estate->flags);
write_seqlock(&net->fs_lock);
if (responded) {
@@ -50,6 +87,7 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server
clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
list_add_tail(&server->probe_link, &net->fs_probe_fast);
}
+
write_sequnlock(&net->fs_lock);
afs_schedule_fs_probe(net, server, !responded);
@@ -58,12 +96,13 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server
/*
* Handle the completion of a probe.
*/
-static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
+static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server,
+ struct afs_endpoint_state *estate)
{
_enter("");
- if (atomic_dec_and_test(&server->probe_outstanding))
- afs_finished_fs_probe(net, server);
+ if (atomic_dec_and_test(&estate->nr_probing))
+ afs_finished_fs_probe(net, server, estate);
wake_up_all(&server->probe_wq);
}
@@ -74,24 +113,22 @@ static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server
*/
static void afs_fs_probe_not_done(struct afs_net *net,
struct afs_server *server,
- struct afs_addr_cursor *ac)
+ struct afs_endpoint_state *estate,
+ int index)
{
- struct afs_addr_list *alist = ac->alist;
- unsigned int index = ac->index;
-
_enter("");
trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
spin_lock(&server->probe_lock);
- server->probe.local_failure = true;
- if (server->probe.error == 0)
- server->probe.error = -ENOMEM;
+ set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
+ if (estate->error == 0)
+ estate->error = -ENOMEM;
- set_bit(index, &alist->failed);
+ set_bit(index, &estate->failed_set);
spin_unlock(&server->probe_lock);
- return afs_done_one_fs_probe(net, server);
+ return afs_done_one_fs_probe(net, server, estate);
}
/*
@@ -100,30 +137,34 @@ static void afs_fs_probe_not_done(struct afs_net *net,
*/
void afs_fileserver_probe_result(struct afs_call *call)
{
- struct afs_addr_list *alist = call->alist;
+ struct afs_endpoint_state *estate = call->probe;
+ struct afs_addr_list *alist = estate->addresses;
+ struct afs_address *addr = &alist->addrs[call->probe_index];
struct afs_server *server = call->server;
- unsigned int index = call->addr_ix;
- unsigned int rtt_us = 0, cap0;
+ unsigned int index = call->probe_index;
+ unsigned int rtt_us = -1, cap0;
int ret = call->error;
_enter("%pU,%u", &server->uuid, index);
+ WRITE_ONCE(addr->last_error, ret);
+
spin_lock(&server->probe_lock);
switch (ret) {
case 0:
- server->probe.error = 0;
+ estate->error = 0;
goto responded;
case -ECONNABORTED:
- if (!server->probe.responded) {
- server->probe.abort_code = call->abort_code;
- server->probe.error = ret;
+ if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) {
+ estate->abort_code = call->abort_code;
+ estate->error = ret;
}
goto responded;
case -ENOMEM:
case -ENONET:
- clear_bit(index, &alist->responded);
- server->probe.local_failure = true;
+ clear_bit(index, &estate->responsive_set);
+ set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
@@ -136,29 +177,29 @@ void afs_fileserver_probe_result(struct afs_call *call)
case -ETIMEDOUT:
case -ETIME:
default:
- clear_bit(index, &alist->responded);
- set_bit(index, &alist->failed);
- if (!server->probe.responded &&
- (server->probe.error == 0 ||
- server->probe.error == -ETIMEDOUT ||
- server->probe.error == -ETIME))
- server->probe.error = ret;
+ clear_bit(index, &estate->responsive_set);
+ set_bit(index, &estate->failed_set);
+ if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags) &&
+ (estate->error == 0 ||
+ estate->error == -ETIMEDOUT ||
+ estate->error == -ETIME))
+ estate->error = ret;
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
goto out;
}
responded:
- clear_bit(index, &alist->failed);
+ clear_bit(index, &estate->failed_set);
if (call->service_id == YFS_FS_SERVICE) {
- server->probe.is_yfs = true;
+ set_bit(AFS_ESTATE_IS_YFS, &estate->flags);
set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
} else {
- server->probe.not_yfs = true;
- if (!server->probe.is_yfs) {
+ set_bit(AFS_ESTATE_NOT_YFS, &estate->flags);
+ if (!test_bit(AFS_ESTATE_IS_YFS, &estate->flags)) {
clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
}
cap0 = ntohl(call->tmp);
if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
@@ -167,116 +208,148 @@ responded:
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
}
- rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
- if (rtt_us < server->probe.rtt) {
- server->probe.rtt = rtt_us;
+ rtt_us = rxrpc_kernel_get_srtt(addr->peer);
+ if (rtt_us < estate->rtt) {
+ estate->rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
}
smp_wmb(); /* Set rtt before responded. */
- server->probe.responded = true;
- set_bit(index, &alist->responded);
+ set_bit(AFS_ESTATE_RESPONDED, &estate->flags);
+ set_bit(index, &estate->responsive_set);
set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
out:
spin_unlock(&server->probe_lock);
- _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
- &server->uuid, index, &alist->addrs[index].transport,
+ trace_afs_fs_probe(server, false, estate, index, call->error, call->abort_code, rtt_us);
+ _debug("probe[%x] %pU [%u] %pISpc rtt=%d ret=%d",
+ estate->probe_seq, &server->uuid, index,
+ rxrpc_kernel_remote_addr(alist->addrs[index].peer),
rtt_us, ret);
- return afs_done_one_fs_probe(call->net, server);
+ return afs_done_one_fs_probe(call->net, server, estate);
}
/*
- * Probe one or all of a fileserver's addresses to find out the best route and
- * to query its capabilities.
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
*/
-void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
- struct key *key, bool all)
+int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct afs_addr_list *new_alist, struct key *key)
{
- struct afs_addr_cursor ac = {
- .index = 0,
- };
+ struct afs_endpoint_state *estate, *old;
+ struct afs_addr_list *old_alist = NULL, *alist;
+ unsigned long unprobed;
_enter("%pU", &server->uuid);
- read_lock(&server->fs_lock);
- ac.alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->fs_lock));
- afs_get_addrlist(ac.alist);
- read_unlock(&server->fs_lock);
+ estate = kzalloc(sizeof(*estate), GFP_KERNEL);
+ if (!estate)
+ return -ENOMEM;
+
+ refcount_set(&estate->ref, 2);
+ estate->server_id = server->debug_id;
+ estate->rtt = UINT_MAX;
+
+ write_lock(&server->fs_lock);
+
+ old = rcu_dereference_protected(server->endpoint_state,
+ lockdep_is_held(&server->fs_lock));
+ if (old) {
+ estate->responsive_set = old->responsive_set;
+ if (!new_alist)
+ new_alist = old->addresses;
+ }
+
+ if (old_alist != new_alist)
+ afs_set_peer_appdata(server, old_alist, new_alist);
+
+ estate->addresses = afs_get_addrlist(new_alist, afs_alist_trace_get_estate);
+ alist = estate->addresses;
+ estate->probe_seq = ++server->probe_counter;
+ atomic_set(&estate->nr_probing, alist->nr_addrs);
+
+ if (new_alist)
+ server->addr_version = new_alist->version;
+ rcu_assign_pointer(server->endpoint_state, estate);
+ write_unlock(&server->fs_lock);
+ if (old)
+ set_bit(AFS_ESTATE_SUPERSEDED, &old->flags);
+
+ trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+ afs_estate_trace_alloc_probe);
+
+ afs_get_address_preferences(net, new_alist);
server->probed_at = jiffies;
- atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
- memset(&server->probe, 0, sizeof(server->probe));
- server->probe.rtt = UINT_MAX;
-
- ac.index = ac.alist->preferred;
- if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
- all = true;
-
- if (all) {
- for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
- if (!afs_fs_get_capabilities(net, server, &ac, key))
- afs_fs_probe_not_done(net, server, &ac);
- } else {
- if (!afs_fs_get_capabilities(net, server, &ac, key))
- afs_fs_probe_not_done(net, server, &ac);
+ unprobed = (1UL << alist->nr_addrs) - 1;
+ while (unprobed) {
+ unsigned int index = 0, i;
+ int best_prio = -1;
+
+ for (i = 0; i < alist->nr_addrs; i++) {
+ if (test_bit(i, &unprobed) &&
+ alist->addrs[i].prio > best_prio) {
+ index = i;
+ best_prio = alist->addrs[i].prio;
+ }
+ }
+ __clear_bit(index, &unprobed);
+
+ trace_afs_fs_probe(server, true, estate, index, 0, 0, 0);
+ if (!afs_fs_get_capabilities(net, server, estate, index, key))
+ afs_fs_probe_not_done(net, server, estate, index);
}
- afs_put_addrlist(ac.alist);
+ afs_put_endpoint_state(old, afs_estate_trace_put_probe);
+ afs_put_endpoint_state(estate, afs_estate_trace_put_probe);
+ return 0;
}
/*
- * Wait for the first as-yet untried fileserver to respond.
+ * Wait for the first as-yet untried fileserver to respond, for the probe state
+ * to be superseded or for all probes to finish.
*/
-int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr)
{
- struct wait_queue_entry *waits;
- struct afs_server *server;
- unsigned int rtt = UINT_MAX, rtt_s;
- bool have_responders = false;
- int pref = -1, i;
+ struct afs_endpoint_state *estate;
+ struct afs_server_list *slist = op->server_list;
+ bool still_probing = true;
+ int ret = 0, i;
- _enter("%u,%lx", slist->nr_servers, untried);
+ _enter("%u", slist->nr_servers);
- /* Only wait for servers that have a probe outstanding. */
for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- if (!atomic_read(&server->probe_outstanding))
- __clear_bit(i, &untried);
- if (server->probe.responded)
- have_responders = true;
- }
+ estate = states[i].endpoint_state;
+ if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
+ return 2;
+ if (atomic_read(&estate->nr_probing))
+ still_probing = true;
+ if (estate->responsive_set & states[i].untried_addrs)
+ return 1;
}
- if (have_responders || !untried)
+ if (!still_probing)
return 0;
- waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
- if (!waits)
- return -ENOMEM;
-
- for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- init_waitqueue_entry(&waits[i], current);
- add_wait_queue(&server->probe_wq, &waits[i]);
- }
- }
+ for (i = 0; i < slist->nr_servers; i++)
+ add_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
for (;;) {
- bool still_probing = false;
+ still_probing = false;
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- if (server->probe.responded)
- goto stop;
- if (atomic_read(&server->probe_outstanding))
- still_probing = true;
+ estate = states[i].endpoint_state;
+ if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) {
+ ret = 2;
+ goto stop;
+ }
+ if (atomic_read(&estate->nr_probing))
+ still_probing = true;
+ if (estate->responsive_set & states[i].untried_addrs) {
+ ret = 1;
+ goto stop;
}
}
@@ -288,28 +361,12 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
stop:
set_current_state(TASK_RUNNING);
- for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- rtt_s = READ_ONCE(server->rtt);
- if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
- rtt_s < rtt) {
- pref = i;
- rtt = rtt_s;
- }
+ for (i = 0; i < slist->nr_servers; i++)
+ remove_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
- remove_wait_queue(&server->probe_wq, &waits[i]);
- }
- }
-
- kfree(waits);
-
- if (pref == -1 && signal_pending(current))
- return -ERESTARTSYS;
-
- if (pref >= 0)
- slist->preferred = pref;
- return 0;
+ if (!ret && signal_pending(current))
+ ret = -ERESTARTSYS;
+ return ret;
}
/*
@@ -327,7 +384,7 @@ void afs_fs_probe_timer(struct timer_list *timer)
/*
* Dispatch a probe to a server.
*/
-static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
+static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server)
__releases(&net->fs_lock)
{
struct key *key = NULL;
@@ -340,7 +397,7 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server
afs_get_server(server, afs_server_trace_get_probe);
write_sequnlock(&net->fs_lock);
- afs_fs_probe_fileserver(net, server, key, all);
+ afs_fs_probe_fileserver(net, server, NULL, key);
afs_put_server(net, server, afs_server_trace_put_probe);
}
@@ -352,7 +409,7 @@ void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
{
write_seqlock(&net->fs_lock);
if (!list_empty(&server->probe_link))
- return afs_dispatch_fs_probe(net, server, true);
+ return afs_dispatch_fs_probe(net, server);
write_sequnlock(&net->fs_lock);
}
@@ -412,7 +469,7 @@ again:
_debug("probe %pU", &server->uuid);
if (server && (first_pass || !need_resched())) {
- afs_dispatch_fs_probe(net, server, server == fast);
+ afs_dispatch_fs_probe(net, server);
first_pass = false;
goto again;
}
@@ -436,12 +493,13 @@ again:
/*
* Wait for a probe on a particular fileserver to complete for 2s.
*/
-int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
+int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
+ unsigned long exclude, bool is_intr)
{
struct wait_queue_entry wait;
unsigned long timo = 2 * HZ;
- if (atomic_read(&server->probe_outstanding) == 0)
+ if (atomic_read(&estate->nr_probing) == 0)
goto dont_wait;
init_wait_entry(&wait, 0);
@@ -449,8 +507,9 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
prepare_to_wait_event(&server->probe_wq, &wait,
is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
if (timo == 0 ||
- server->probe.responded ||
- atomic_read(&server->probe_outstanding) == 0 ||
+ test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags) ||
+ (estate->responsive_set & ~exclude) ||
+ atomic_read(&estate->nr_probing) == 0 ||
(is_intr && signal_pending(current)))
break;
timo = schedule_timeout(timo);
@@ -459,8 +518,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
finish_wait(&server->probe_wq, &wait);
dont_wait:
- if (server->probe.responded)
+ if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
return 0;
+ if (estate->responsive_set & ~exclude)
+ return 1;
if (is_intr && signal_pending(current))
return -ERESTARTSYS;
if (timo == 0)
@@ -473,6 +534,6 @@ dont_wait:
*/
void afs_fs_probe_cleanup(struct afs_net *net)
{
- if (del_timer_sync(&net->fs_probe_timer))
+ if (timer_delete_sync(&net->fs_probe_timer))
afs_dec_servers_outstanding(net);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 7d37f63ef0f0..bc9556991d7c 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -290,6 +290,7 @@ void afs_fs_fetch_status(struct afs_operation *op)
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -300,18 +301,19 @@ void afs_fs_fetch_status(struct afs_operation *op)
static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
struct afs_operation *op = call->op;
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
const __be32 *bp;
+ size_t count_before;
int ret;
_enter("{%u,%zu,%zu/%llu}",
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
- req->actual_len);
+ call->remaining);
switch (call->unmarshall) {
case 0:
- req->actual_len = 0;
+ call->remaining = 0;
call->unmarshall++;
if (call->operation_ID == FSFETCHDATA64) {
afs_extract_to_tmp64(call);
@@ -321,8 +323,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
}
fallthrough;
- /* Extract the returned data length into
- * ->actual_len. This may indicate more or less data than was
+ /* Extract the returned data length into ->remaining.
+ * This may indicate more or less data than was
* requested will be returned.
*/
case 1:
@@ -331,38 +333,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
if (ret < 0)
return ret;
- req->actual_len = be64_to_cpu(call->tmp64);
- _debug("DATA length: %llu", req->actual_len);
+ call->remaining = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", call->remaining);
- if (req->actual_len == 0)
+ if (call->remaining == 0)
goto no_more_data;
- call->iter = req->iter;
- call->iov_len = min(req->actual_len, req->len);
+ call->iter = &subreq->io_iter;
+ call->iov_len = umin(call->remaining, subreq->len - subreq->transferred);
call->unmarshall++;
fallthrough;
/* extract the returned data */
case 2:
- _debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->actual_len);
+ count_before = call->iov_len;
+ _debug("extract data %zu/%llu", count_before, call->remaining);
ret = afs_extract_data(call, true);
+ subreq->transferred += count_before - call->iov_len;
+ call->remaining -= count_before - call->iov_len;
if (ret < 0)
return ret;
call->iter = &call->def_iter;
- if (req->actual_len <= req->len)
+ if (call->remaining)
goto no_more_data;
/* Discard any excess data the server gave us */
- afs_extract_discard(call, req->actual_len - req->len);
+ afs_extract_discard(call, call->remaining);
call->unmarshall = 3;
fallthrough;
case 3:
_debug("extract discard %zu/%llu",
- iov_iter_count(call->iter), req->actual_len - req->len);
+ iov_iter_count(call->iter), call->remaining);
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -384,8 +388,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
xdr_decode_AFSCallBack(&bp, call, &vp->scb);
xdr_decode_AFSVolSync(&bp, &op->volsync);
- req->data_version = vp->scb.status.data_version;
- req->file_size = vp->scb.status.size;
+ if (subreq->start + subreq->transferred >= vp->scb.status.size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
call->unmarshall++;
fallthrough;
@@ -404,14 +408,18 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
static const struct afs_call_type afs_RXFSFetchData = {
.name = "FS.FetchData",
.op = afs_FS_FetchData,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = afs_deliver_fs_fetch_data,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSFetchData64 = {
.name = "FS.FetchData64",
.op = afs_FS_FetchData64,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = afs_deliver_fs_fetch_data,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
@@ -420,8 +428,8 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
*/
static void afs_fs_fetch_data64(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
struct afs_call *call;
__be32 *bp;
@@ -431,17 +439,21 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
+ if (op->flags & AFS_OPERATION_ASYNC)
+ call->async = true;
+
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA64);
bp[1] = htonl(vp->fid.vid);
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
- bp[4] = htonl(upper_32_bits(req->pos));
- bp[5] = htonl(lower_32_bits(req->pos));
+ bp[4] = htonl(upper_32_bits(subreq->start + subreq->transferred));
+ bp[5] = htonl(lower_32_bits(subreq->start + subreq->transferred));
bp[6] = 0;
- bp[7] = htonl(lower_32_bits(req->len));
+ bp[7] = htonl(lower_32_bits(subreq->len - subreq->transferred));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -451,9 +463,9 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
*/
void afs_fs_fetch_data(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
- struct afs_read *req = op->fetch.req;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
@@ -465,17 +477,16 @@ void afs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- req->call_debug_id = call->debug_id;
-
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA);
bp[1] = htonl(vp->fid.vid);
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
- bp[4] = htonl(lower_32_bits(req->pos));
- bp[5] = htonl(lower_32_bits(req->len));
+ bp[4] = htonl(lower_32_bits(subreq->start + subreq->transferred));
+ bp[5] = htonl(lower_32_bits(subreq->len + subreq->transferred));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -559,6 +570,7 @@ void afs_fs_create_file(struct afs_operation *op)
*bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -612,6 +624,7 @@ void afs_fs_make_dir(struct afs_operation *op)
*bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -685,6 +698,7 @@ void afs_fs_remove_file(struct afs_operation *op)
bp = (void *) bp + padsz;
}
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -732,6 +746,7 @@ void afs_fs_remove_dir(struct afs_operation *op)
bp = (void *) bp + padsz;
}
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -812,6 +827,7 @@ void afs_fs_link(struct afs_operation *op)
*bp++ = htonl(vp->fid.vnode);
*bp++ = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call1(call, &vp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -907,6 +923,7 @@ void afs_fs_symlink(struct afs_operation *op)
*bp++ = htonl(S_IRWXUGO); /* unix mode */
*bp++ = 0; /* segment size */
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1003,6 +1020,7 @@ void afs_fs_rename(struct afs_operation *op)
bp = (void *) bp + n_padsz;
}
+ call->fid = orig_dvp->fid;
trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1090,6 +1108,7 @@ static void afs_fs_store_data64(struct afs_operation *op)
*bp++ = htonl(upper_32_bits(op->store.i_size));
*bp++ = htonl(lower_32_bits(op->store.i_size));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1140,6 +1159,7 @@ void afs_fs_store_data(struct afs_operation *op)
*bp++ = htonl(lower_32_bits(op->store.size));
*bp++ = htonl(lower_32_bits(op->store.i_size));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1206,6 +1226,7 @@ static void afs_fs_setattr_size64(struct afs_operation *op)
*bp++ = htonl(upper_32_bits(attr->ia_size)); /* new file length */
*bp++ = htonl(lower_32_bits(attr->ia_size));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1247,6 +1268,7 @@ static void afs_fs_setattr_size(struct afs_operation *op)
*bp++ = 0; /* size of write */
*bp++ = htonl(attr->ia_size); /* new file length */
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1283,6 +1305,7 @@ void afs_fs_setattr(struct afs_operation *op)
xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1446,6 +1469,7 @@ void afs_fs_get_volume_status(struct afs_operation *op)
bp[0] = htonl(FSGETVOLUMESTATUS);
bp[1] = htonl(vp->fid.vid);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1528,6 +1552,7 @@ void afs_fs_set_lock(struct afs_operation *op)
*bp++ = htonl(vp->fid.unique);
*bp++ = htonl(op->lock.type);
+ call->fid = vp->fid;
trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1554,6 +1579,7 @@ void afs_fs_extend_lock(struct afs_operation *op)
*bp++ = htonl(vp->fid.vnode);
*bp++ = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1580,6 +1606,7 @@ void afs_fs_release_lock(struct afs_operation *op)
*bp++ = htonl(vp->fid.vnode);
*bp++ = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1605,13 +1632,12 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
/*
* Flush all the callbacks we have on a server.
*/
-int afs_fs_give_up_all_callbacks(struct afs_net *net,
- struct afs_server *server,
- struct afs_addr_cursor *ac,
- struct key *key)
+int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
+ struct afs_address *addr, struct key *key)
{
struct afs_call *call;
__be32 *bp;
+ int ret;
_enter("");
@@ -1619,15 +1645,22 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
if (!call)
return -ENOMEM;
- call->key = key;
+ call->key = key;
+ call->peer = rxrpc_kernel_get_peer(addr->peer);
+ call->service_id = server->service_id;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(FSGIVEUPALLCALLBACKS);
- call->server = afs_use_server(server, afs_server_trace_give_up_cb);
- afs_make_call(ac, call, GFP_NOFS);
- return afs_wait_for_call_to_complete(call, ac);
+ call->server = afs_use_server(server, false, afs_server_trace_use_give_up_cb);
+ afs_make_call(call, GFP_NOFS);
+ afs_wait_for_call_to_complete(call);
+ ret = call->error;
+ if (call->responded)
+ set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+ afs_put_call(call);
+ return ret;
}
/*
@@ -1689,6 +1722,12 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
return 0;
}
+static void afs_fs_get_capabilities_destructor(struct afs_call *call)
+{
+ afs_put_endpoint_state(call->probe, afs_estate_trace_put_getcaps);
+ afs_flat_call_destructor(call);
+}
+
/*
* FS.GetCapabilities operation type
*/
@@ -1697,7 +1736,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
.done = afs_fileserver_probe_result,
- .destructor = afs_flat_call_destructor,
+ .immediate_cancel = afs_fileserver_probe_result,
+ .destructor = afs_fs_get_capabilities_destructor,
};
/*
@@ -1707,7 +1747,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
* ->done() - otherwise we return false to indicate we didn't even try.
*/
bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
- struct afs_addr_cursor *ac, struct key *key)
+ struct afs_endpoint_state *estate, unsigned int addr_index,
+ struct key *key)
{
struct afs_call *call;
__be32 *bp;
@@ -1718,10 +1759,14 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
if (!call)
return false;
- call->key = key;
- call->server = afs_use_server(server, afs_server_trace_get_caps);
- call->upgrade = true;
- call->async = true;
+ call->key = key;
+ call->server = afs_use_server(server, false, afs_server_trace_use_get_caps);
+ call->peer = rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer);
+ call->probe = afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps);
+ call->probe_index = addr_index;
+ call->service_id = server->service_id;
+ call->upgrade = true;
+ call->async = true;
call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
/* marshall the parameters */
@@ -1729,7 +1774,7 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
*bp++ = htonl(FSGETCAPABILITIES);
trace_afs_make_fs_call(call, NULL);
- afs_make_call(ac, call, GFP_NOFS);
+ afs_make_call(call, GFP_NOFS);
afs_put_call(call);
return true;
}
@@ -1853,7 +1898,10 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_AFSVolSync(&bp, &op->volsync);
+ /* Unfortunately, prior to OpenAFS-1.6, volsync here is filled
+ * with rubbish.
+ */
+ xdr_decode_AFSVolSync(&bp, NULL);
call->unmarshall++;
fallthrough;
@@ -1899,7 +1947,7 @@ void afs_fs_inline_bulk_status(struct afs_operation *op)
int i;
if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) {
- op->error = -ENOTSUPP;
+ afs_op_set_error(op, -ENOTSUPP);
return;
}
@@ -1928,6 +1976,7 @@ void afs_fs_inline_bulk_status(struct afs_operation *op)
*bp++ = htonl(op->more_files[i].fid.unique);
}
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -2033,6 +2082,7 @@ void afs_fs_fetch_acl(struct afs_operation *op)
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
@@ -2078,6 +2128,7 @@ void afs_fs_store_acl(struct afs_operation *op)
if (acl->size != size)
memset((void *)&bp[5] + acl->size, 0, size - acl->size);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6d3a3dbe4928..dde1857fcabb 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -25,8 +25,94 @@
#include "internal.h"
#include "afs_fs.h"
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
+{
+ size_t size = strlen(op->create.symlink) + 1;
+ size_t dsize = 0;
+ char *p;
+
+ if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
+ mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
+ return;
+
+ vnode->directory_size = dsize;
+ p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+ memcpy(p, op->create.symlink, size);
+ kunmap_local(p);
+ set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+}
+
+static void afs_put_link(void *arg)
+{
+ struct folio *folio = virt_to_folio(arg);
+
+ kunmap_local(arg);
+ folio_put(folio);
+}
+
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct folio *folio;
+ char *content;
+ ssize_t ret;
+
+ if (!dentry) {
+ /* RCU pathwalk. */
+ if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
+ return ERR_PTR(-ECHILD);
+ goto good;
+ }
+
+ if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+ goto fetch;
+
+ ret = afs_validate(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+ test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+ goto good;
+
+fetch:
+ ret = afs_read_single(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+
+good:
+ folio = folioq_folio(vnode->directory, 0);
+ folio_get(folio);
+ content = kmap_local_folio(folio, 0);
+ set_delayed_call(callback, afs_put_link, content);
+ return content;
+}
+
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ DEFINE_DELAYED_CALL(done);
+ const char *content;
+ int len;
+
+ content = afs_get_link(dentry, d_inode(dentry), &done);
+ if (IS_ERR(content)) {
+ do_delayed_call(&done);
+ return PTR_ERR(content);
+ }
+
+ len = umin(strlen(content), buflen);
+ if (copy_to_user(buffer, content, len))
+ len = -EFAULT;
+ do_delayed_call(&done);
+ return len;
+}
+
static const struct inode_operations afs_symlink_inode_operations = {
- .get_link = page_get_link,
+ .get_link = afs_get_link,
+ .readlink = afs_readlink,
};
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
@@ -58,7 +144,7 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
*/
static void afs_set_netfs_context(struct afs_vnode *vnode)
{
- netfs_inode_init(&vnode->netfs, &afs_req_ops);
+ netfs_inode_init(&vnode->netfs, &afs_req_ops, true);
}
/*
@@ -85,14 +171,13 @@ static int afs_inode_init_from_status(struct afs_operation *op,
write_seqlock(&vnode->cb_lock);
- vnode->cb_v_break = op->cb_v_break;
- vnode->cb_s_break = op->cb_s_break;
+ vnode->cb_v_check = op->cb_v_break;
vnode->status = *status;
t = status->mtime_client;
- inode->i_ctime = t;
- inode->i_mtime = t;
- inode->i_atime = t;
+ inode_set_ctime_to_ts(inode, t);
+ inode_set_mtime_to_ts(inode, t);
+ inode_set_atime_to_ts(inode, t);
inode->i_flags |= S_NOATIME;
inode->i_uid = make_kuid(&init_user_ns, status->owner);
inode->i_gid = make_kgid(&init_user_ns, status->group);
@@ -111,7 +196,9 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
- mapping_set_large_folios(inode->i_mapping);
+ __set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
+ /* Assume locally cached directory data will be valid. */
+ __set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
break;
case AFS_FTYPE_SYMLINK:
/* Symlinks with a mode of 0644 are actually mountpoints. */
@@ -123,13 +210,13 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_mode = S_IFDIR | 0555;
inode->i_op = &afs_mntpt_inode_operations;
inode->i_fop = &afs_mntpt_file_operations;
- inode->i_mapping->a_ops = &afs_symlink_aops;
} else {
inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
- inode->i_mapping->a_ops = &afs_symlink_aops;
}
+ inode->i_mapping->a_ops = &afs_dir_aops;
inode_nohighmem(inode);
+ mapping_set_release_always(inode->i_mapping);
break;
default:
dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
@@ -141,16 +228,17 @@ static int afs_inode_init_from_status(struct afs_operation *op,
afs_set_netfs_context(vnode);
vnode->invalid_before = status->data_version;
+ trace_afs_set_dv(vnode, status->data_version);
inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
if (!vp->scb.have_cb) {
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
- vnode->cb_expires_at = ktime_get_real_seconds();
+ afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink);
} else {
- vnode->cb_expires_at = vp->scb.callback.expires_at;
vnode->cb_server = op->server;
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ afs_set_cb_promise(vnode, vp->scb.callback.expires_at,
+ afs_cb_promise_set_new_inode);
}
write_sequnlock(&vnode->cb_lock);
@@ -168,6 +256,7 @@ static void afs_apply_status(struct afs_operation *op,
struct inode *inode = &vnode->netfs.inode;
struct timespec64 t;
umode_t mode;
+ bool unexpected_jump = false;
bool data_changed = false;
bool change_size = vp->set_size;
@@ -204,17 +293,23 @@ static void afs_apply_status(struct afs_operation *op,
}
t = status->mtime_client;
- inode->i_mtime = t;
+ inode_set_mtime_to_ts(inode, t);
if (vp->update_ctime)
- inode->i_ctime = op->ctime;
+ inode_set_ctime_to_ts(inode, op->ctime);
- if (vnode->status.data_version != status->data_version)
+ if (vnode->status.data_version != status->data_version) {
+ trace_afs_set_dv(vnode, status->data_version);
data_changed = true;
+ }
vnode->status = *status;
if (vp->dv_before + vp->dv_delta != status->data_version) {
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta,
+ status->data_version);
+
+ if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
+ atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
vnode->fid.vid, vnode->fid.vnode,
(unsigned long long)vp->dv_before + vp->dv_delta,
@@ -223,13 +318,13 @@ static void afs_apply_status(struct afs_operation *op,
op->debug_id);
vnode->invalid_before = status->data_version;
- if (vnode->status.type == AFS_FTYPE_DIR) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- afs_stat_v(vnode, n_inval);
- } else {
+ if (vnode->status.type == AFS_FTYPE_DIR)
+ afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch);
+ else
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
- }
change_size = true;
+ data_changed = true;
+ unexpected_jump = true;
} else if (vnode->status.type == AFS_FTYPE_DIR) {
/* Expected directory change is handled elsewhere so
* that we can locally edit the directory and save on a
@@ -249,11 +344,15 @@ static void afs_apply_status(struct afs_operation *op,
* what's on the server.
*/
vnode->netfs.remote_i_size = status->size;
- if (change_size) {
+ if (change_size || status->size > i_size_read(inode)) {
afs_set_i_size(vnode, status->size);
- inode->i_ctime = t;
- inode->i_atime = t;
+ if (unexpected_jump)
+ vnode->netfs.zero_point = status->size;
+ inode_set_ctime_to_ts(inode, t);
+ inode_set_atime_to_ts(inode, t);
}
+ if (op->ops == &afs_fetch_data_operation)
+ op->fetch.subreq->rreq->i_size = status->size;
}
}
@@ -267,9 +366,9 @@ static void afs_apply_callback(struct afs_operation *op,
struct afs_vnode *vnode = vp->vnode;
if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
- vnode->cb_expires_at = cb->expires_at;
- vnode->cb_server = op->server;
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ if (op->volume->type == AFSVL_RWVOL)
+ vnode->cb_server = op->server;
+ afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb);
}
}
@@ -328,9 +427,9 @@ static void afs_fetch_status_success(struct afs_operation *op)
struct afs_vnode *vnode = vp->vnode;
int ret;
- if (vnode->netfs.inode.i_state & I_NEW) {
+ if (inode_state_read_once(&vnode->netfs.inode) & I_NEW) {
ret = afs_inode_init_from_status(op, vp, vnode);
- op->error = ret;
+ afs_op_set_error(op, ret);
if (ret == 0)
afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
} else {
@@ -431,7 +530,9 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
} __packed key;
struct afs_vnode_cache_aux aux;
- if (vnode->status.type != AFS_FTYPE_FILE) {
+ if (vnode->status.type != AFS_FTYPE_FILE &&
+ vnode->status.type != AFS_FTYPE_DIR &&
+ vnode->status.type != AFS_FTYPE_SYMLINK) {
vnode->netfs.cache = NULL;
return;
}
@@ -449,7 +550,7 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
0 : FSCACHE_ADV_SINGLE_CHUNK,
&key, sizeof(key),
&aux, sizeof(aux),
- vnode->status.size));
+ i_size_read(&vnode->netfs.inode)));
#endif
}
@@ -478,7 +579,7 @@ struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp)
inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
/* deal with an existing inode */
- if (!(inode->i_state & I_NEW)) {
+ if (!(inode_state_read_once(inode) & I_NEW)) {
_leave(" = %p", inode);
return inode;
}
@@ -508,7 +609,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque)
struct afs_vnode *vnode = AFS_FS_I(inode);
vnode->volume = as->volume;
- vnode->fid.vid = as->volume->vid,
+ vnode->fid.vid = as->volume->vid;
vnode->fid.vnode = 1;
vnode->fid.unique = 1;
inode->i_ino = 1;
@@ -538,10 +639,10 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key)
_debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid);
- BUG_ON(!(inode->i_state & I_NEW));
+ BUG_ON(!(inode_state_read_once(inode) & I_NEW));
vnode = AFS_FS_I(inode);
- vnode->cb_v_break = as->volume->cb_v_break,
+ vnode->cb_v_check = atomic_read(&as->volume->cb_v_break);
afs_set_netfs_context(vnode);
op = afs_alloc_operation(key, as->volume);
@@ -572,184 +673,21 @@ error:
}
/*
- * mark the data attached to an inode as obsolete due to a write on the server
- * - might also want to ditch all the outstanding writes and dirty pages
- */
-static void afs_zap_data(struct afs_vnode *vnode)
-{
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
- afs_invalidate_cache(vnode, 0);
-
- /* nuke all the non-dirty pages that aren't locked, mapped or being
- * written back in a regular file and completely discard the pages in a
- * directory or symlink */
- if (S_ISREG(vnode->netfs.inode.i_mode))
- invalidate_remote_inode(&vnode->netfs.inode);
- else
- invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
-}
-
-/*
- * Check to see if we have a server currently serving this volume and that it
- * hasn't been reinitialised or dropped from the list.
- */
-static bool afs_check_server_good(struct afs_vnode *vnode)
-{
- struct afs_server_list *slist;
- struct afs_server *server;
- bool good;
- int i;
-
- if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break))
- return true;
-
- rcu_read_lock();
-
- slist = rcu_dereference(vnode->volume->servers);
- for (i = 0; i < slist->nr_servers; i++) {
- server = slist->servers[i].server;
- if (server == vnode->cb_server) {
- good = (vnode->cb_s_break == server->cb_s_break);
- rcu_read_unlock();
- return good;
- }
- }
-
- rcu_read_unlock();
- return false;
-}
-
-/*
- * Check the validity of a vnode/inode.
- */
-bool afs_check_validity(struct afs_vnode *vnode)
-{
- enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
- time64_t now = ktime_get_real_seconds();
- unsigned int cb_break;
- int seq = 0;
-
- do {
- read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- cb_break = vnode->cb_break;
-
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_v_break != vnode->volume->cb_v_break)
- need_clear = afs_cb_break_for_v_break;
- else if (!afs_check_server_good(vnode))
- need_clear = afs_cb_break_for_s_reinit;
- else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
- need_clear = afs_cb_break_for_zap;
- else if (vnode->cb_expires_at - 10 <= now)
- need_clear = afs_cb_break_for_lapsed;
- } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- ;
- } else {
- need_clear = afs_cb_break_no_promise;
- }
-
- } while (need_seqretry(&vnode->cb_lock, seq));
-
- done_seqretry(&vnode->cb_lock, seq);
-
- if (need_clear == afs_cb_break_no_break)
- return true;
-
- write_seqlock(&vnode->cb_lock);
- if (need_clear == afs_cb_break_no_promise)
- vnode->cb_v_break = vnode->volume->cb_v_break;
- else if (cb_break == vnode->cb_break)
- __afs_break_callback(vnode, need_clear);
- else
- trace_afs_cb_miss(&vnode->fid, need_clear);
- write_sequnlock(&vnode->cb_lock);
- return false;
-}
-
-/*
- * validate a vnode/inode
- * - there are several things we need to check
- * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
- * symlink)
- * - parent dir metadata changed (security changes)
- * - dentry data changed (write, truncate)
- * - dentry metadata changed (security changes)
- */
-int afs_validate(struct afs_vnode *vnode, struct key *key)
-{
- int ret;
-
- _enter("{v={%llx:%llu} fl=%lx},%x",
- vnode->fid.vid, vnode->fid.vnode, vnode->flags,
- key_serial(key));
-
- if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) {
- if (vnode->netfs.inode.i_nlink)
- clear_nlink(&vnode->netfs.inode);
- goto valid;
- }
-
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
- afs_check_validity(vnode))
- goto valid;
-
- down_write(&vnode->validate_lock);
-
- /* if the promise has expired, we need to check the server again to get
- * a new promise - note that if the (parent) directory's metadata was
- * changed then the security may be different and we may no longer have
- * access */
- if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- _debug("not promised");
- ret = afs_fetch_status(vnode, key, false, NULL);
- if (ret < 0) {
- if (ret == -ENOENT) {
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- ret = -ESTALE;
- }
- goto error_unlock;
- }
- _debug("new promise [fl=%lx]", vnode->flags);
- }
-
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- _debug("file already deleted");
- ret = -ESTALE;
- goto error_unlock;
- }
-
- /* if the vnode's data version number changed then its contents are
- * different */
- if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
- afs_zap_data(vnode);
- up_write(&vnode->validate_lock);
-valid:
- _leave(" = 0");
- return 0;
-
-error_unlock:
- up_write(&vnode->validate_lock);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* read the attributes of an inode
*/
-int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct afs_vnode *vnode = AFS_FS_I(inode);
struct key *key;
- int ret, seq = 0;
+ int ret, seq;
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
if (vnode->volume &&
!(query_flags & AT_STATX_DONT_SYNC) &&
- !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ atomic64_read(&vnode->cb_expires_at) == AFS_NO_CB_PROMISE) {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key))
return PTR_ERR(key);
@@ -760,14 +698,20 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
}
do {
- read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- generic_fillattr(&init_user_ns, inode, stat);
+ seq = read_seqbegin(&vnode->cb_lock);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
stat->nlink > 0)
stat->nlink -= 1;
- } while (need_seqretry(&vnode->cb_lock, seq));
- done_seqretry(&vnode->cb_lock, seq);
+ /* Lie about the size of directories. We maintain a locally
+ * edited copy and may make different allocation decisions on
+ * it, but we need to give userspace the server's size.
+ */
+ if (S_ISDIR(inode->i_mode))
+ stat->size = vnode->netfs.remote_i_size;
+ } while (read_seqretry(&vnode->cb_lock, seq));
+
return 0;
}
@@ -779,9 +723,9 @@ int afs_drop_inode(struct inode *inode)
_enter("");
if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags))
- return generic_delete_inode(inode);
+ return inode_just_drop(inode);
else
- return generic_drop_inode(inode);
+ return inode_generic_drop(inode);
}
/*
@@ -790,6 +734,7 @@ int afs_drop_inode(struct inode *inode)
void afs_evict_inode(struct inode *inode)
{
struct afs_vnode_cache_aux aux;
+ struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
struct afs_vnode *vnode = AFS_FS_I(inode);
_enter("{%llx:%llu.%d}",
@@ -801,10 +746,25 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+ if ((S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) &&
+ (inode_state_read_once(inode) & I_DIRTY) &&
+ !sbi->dyn_root) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .for_sync = true,
+ .range_end = LLONG_MAX,
+ };
+
+ afs_single_writepages(inode->i_mapping, &wbc);
+ }
+
+ netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
+ netfs_free_folioq_buffer(vnode->directory);
afs_set_cache_aux(vnode, &aux);
- fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux);
+ netfs_clear_inode_writeback(inode, &aux);
clear_inode(inode);
while (!list_empty(&vnode->wb_keys)) {
@@ -846,17 +806,22 @@ static void afs_setattr_success(struct afs_operation *op)
static void afs_setattr_edit_file(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
- struct inode *inode = &vp->vnode->netfs.inode;
+ struct afs_vnode *vnode = vp->vnode;
+ struct inode *inode = &vnode->netfs.inode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size;
- loff_t i_size = op->setattr.old_i_size;
+ loff_t old = op->setattr.old_i_size;
- if (size < i_size)
+ /* Note: inode->i_size was updated by afs_apply_status() inside
+ * the I/O and callback locks.
+ */
+
+ if (size != old) {
truncate_pagecache(inode, size);
- if (size != i_size)
- fscache_resize_cookie(afs_vnode_cache(vp->vnode),
- vp->scb.status.size);
+ netfs_resize_file(&vnode->netfs, size, true);
+ fscache_resize_cookie(afs_vnode_cache(vnode), size);
+ }
}
}
@@ -870,7 +835,7 @@ static const struct afs_operation_ops afs_setattr_operation = {
/*
* set the attributes of an inode
*/
-int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
const unsigned int supported =
@@ -924,11 +889,11 @@ int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
*/
if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
attr->ia_size < i_size &&
- attr->ia_size > vnode->status.size) {
- truncate_pagecache(inode, attr->ia_size);
+ attr->ia_size > vnode->netfs.remote_i_size) {
+ truncate_setsize(inode, attr->ia_size);
+ netfs_resize_file(&vnode->netfs, size, false);
fscache_resize_cookie(afs_vnode_cache(vnode),
attr->ia_size);
- i_size_write(inode, attr->ia_size);
ret = 0;
goto out_unlock;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index fd8567b98e2b..009064b8d661 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/pagemap.h>
#include <linux/rxrpc.h>
#include <linux/key.h>
@@ -19,6 +20,7 @@
#include <linux/uuid.h>
#include <linux/mm_types.h>
#include <linux/dns_resolver.h>
+#include <crypto/krb5.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -32,6 +34,7 @@
struct pagevec;
struct afs_call;
struct afs_vnode;
+struct afs_server_probe;
/*
* Partial file-locking emulation mode. (The problem being that AFS3 only
@@ -72,21 +75,51 @@ enum afs_call_state {
};
/*
+ * Address preferences.
+ */
+struct afs_addr_preference {
+ union {
+ struct in_addr ipv4_addr; /* AF_INET address to compare against */
+ struct in6_addr ipv6_addr; /* AF_INET6 address to compare against */
+ };
+ sa_family_t family; /* Which address to use */
+ u16 prio; /* Priority */
+ u8 subnet_mask; /* How many bits to compare */
+};
+
+struct afs_addr_preference_list {
+ struct rcu_head rcu;
+ u16 version; /* Incremented when prefs list changes */
+ u8 ipv6_off; /* Offset of IPv6 addresses */
+ u8 nr; /* Number of addresses in total */
+ u8 max_prefs; /* Number of prefs allocated */
+ struct afs_addr_preference prefs[] __counted_by(max_prefs);
+};
+
+struct afs_address {
+ struct rxrpc_peer *peer;
+ short last_error; /* Last error from this address */
+ u16 prio; /* Address priority */
+};
+
+/*
* List of server addresses.
*/
struct afs_addr_list {
struct rcu_head rcu;
refcount_t usage;
u32 version; /* Version */
+ unsigned int debug_id;
+ unsigned int addr_pref_version; /* Version of address preference list */
unsigned char max_addrs;
unsigned char nr_addrs;
unsigned char preferred; /* Preferred address */
unsigned char nr_ipv4; /* Number of IPv4 addresses */
enum dns_record_source source:8;
enum dns_lookup_status status:8;
- unsigned long failed; /* Mask of addrs that failed locally/ICMP */
+ unsigned long probe_failed; /* Mask of addrs that failed locally/ICMP */
unsigned long responded; /* Mask of addrs that responded */
- struct sockaddr_rxrpc addrs[];
+ struct afs_address addrs[] __counted_by(max_addrs);
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};
@@ -95,11 +128,12 @@ struct afs_addr_list {
*/
struct afs_call {
const struct afs_call_type *type; /* type of call */
- struct afs_addr_list *alist; /* Address is alist[addr_ix] */
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
+ struct work_struct free_work; /* Deferred free processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */
+ struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
struct afs_server *server; /* The fileserver record if fs op (pins ref) */
@@ -115,11 +149,14 @@ struct afs_call {
};
void *buffer; /* reply receive buffer */
union {
- long ret0; /* Value to reply with instead of 0 */
+ struct afs_endpoint_state *probe;
+ struct afs_addr_list *vl_probe;
struct afs_addr_list *ret_alist;
struct afs_vldb_entry *ret_vldb;
char *ret_str;
};
+ struct afs_fid fid; /* Primary vnode ID (or all zeroes) */
+ unsigned char probe_index; /* Address in ->probe_alist */
struct afs_operation *op;
unsigned int server_index;
refcount_t ref;
@@ -127,20 +164,23 @@ struct afs_call {
spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
- unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */
+ unsigned long long remaining; /* How much is left to receive */
+ unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned count2; /* count used in unmarshalling */
unsigned char unmarshall; /* unmarshalling phase */
- unsigned char addr_ix; /* Address in ->alist */
bool drop_ref; /* T if need to drop ref for incoming call */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
+ bool responded; /* Got a response from the call (may be abort) */
+ u8 security_ix; /* Security class */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
+ u32 enctype; /* Security encoding type */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
union { /* place to extract temporary data */
@@ -165,11 +205,17 @@ struct afs_call_type {
/* clean up a call */
void (*destructor)(struct afs_call *call);
+ /* Async receive processing function */
+ void (*async_rx)(struct work_struct *work);
+
/* Work function */
void (*work)(struct work_struct *work);
/* Call done function (gets called immediately on success or failure) */
void (*done)(struct afs_call *call);
+
+ /* Handle a call being immediately cancelled. */
+ void (*immediate_cancel)(struct afs_call *call);
};
/*
@@ -197,28 +243,6 @@ static inline struct key *afs_file_key(struct file *file)
}
/*
- * Record of an outstanding read operation on a vnode.
- */
-struct afs_read {
- loff_t pos; /* Where to start reading */
- loff_t len; /* How much we're asking for */
- loff_t actual_len; /* How much we're actually getting */
- loff_t file_size; /* File size returned by server */
- struct key *key; /* The key to use to reissue the read */
- struct afs_vnode *vnode; /* The file being read into. */
- struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */
- afs_dataversion_t data_version; /* Version number returned by server */
- refcount_t usage;
- unsigned int call_debug_id;
- unsigned int nr_pages;
- int error;
- void (*done)(struct afs_read *);
- void (*cleanup)(struct afs_read *);
- struct iov_iter *iter; /* Iterator representing the buffer */
- struct iov_iter def_iter; /* Default iterator */
-};
-
-/*
* AFS superblock private data
* - there's one superblock per volume
*/
@@ -260,15 +284,15 @@ struct afs_net {
struct socket *socket;
struct afs_call *spare_incoming_call;
struct work_struct charge_preallocation_work;
+ struct work_struct rx_oob_work;
struct mutex socket_mutex;
atomic_t nr_outstanding_calls;
atomic_t nr_superblocks;
/* Cell database */
struct rb_root cells;
- struct afs_cell *ws_cell;
- struct work_struct cells_manager;
- struct timer_list cells_timer;
+ struct idr cells_dyn_ino; /* cell->dynroot_ino mapping */
+ struct afs_cell __rcu *ws_cell;
atomic_t cells_outstanding;
struct rw_semaphore cells_lock;
struct mutex cells_alias_lock;
@@ -280,19 +304,12 @@ struct afs_net {
* cell, but in practice, people create aliases and subsets and there's
* no easy way to distinguish them.
*/
- seqlock_t fs_lock; /* For fs_servers, fs_probe_*, fs_proc */
- struct rb_root fs_servers; /* afs_server (by server UUID or address) */
+ seqlock_t fs_lock; /* For fs_probe_*, fs_proc */
struct list_head fs_probe_fast; /* List of afs_server to probe at 30s intervals */
struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */
struct hlist_head fs_proc; /* procfs servers list */
- struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */
- struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */
- seqlock_t fs_addr_lock; /* For fs_addresses[46] */
-
- struct work_struct fs_manager;
- struct timer_list fs_timer;
-
+ struct key *fs_cm_token_key; /* Key for creating CM tokens */
struct work_struct fs_prober;
struct timer_list fs_probe_timer;
atomic_t servers_outstanding;
@@ -305,6 +322,8 @@ struct afs_net {
struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */
struct afs_sysnames *sysnames;
rwlock_t sysnames_lock;
+ struct afs_addr_preference_list __rcu *address_prefs;
+ u16 address_pref_version;
/* Statistics counters */
atomic_t n_lookup; /* Number of lookups done */
@@ -323,13 +342,11 @@ struct afs_net {
extern const char afs_init_sysname[];
enum afs_cell_state {
- AFS_CELL_UNSET,
- AFS_CELL_ACTIVATING,
+ AFS_CELL_SETTING_UP,
+ AFS_CELL_UNLOOKED,
AFS_CELL_ACTIVE,
- AFS_CELL_DEACTIVATING,
- AFS_CELL_INACTIVE,
- AFS_CELL_FAILED,
- AFS_CELL_REMOVED,
+ AFS_CELL_REMOVING,
+ AFS_CELL_DEAD,
};
/*
@@ -360,7 +377,9 @@ struct afs_cell {
struct afs_cell *alias_of; /* The cell this is an alias of */
struct afs_volume *root_volume; /* The root.cell volume if there is one */
struct key *anonymous_key; /* anonymous user key for this cell */
+ struct work_struct destroyer; /* Destroyer for cell */
struct work_struct manager; /* Manager for init/deinit/dns */
+ struct timer_list management_timer; /* General management timer */
struct hlist_node proc_link; /* /proc cell list link */
time64_t dns_expiry; /* Time AFSDB/SRV record expires */
time64_t last_inactive; /* Time of last drop of usage count */
@@ -376,18 +395,17 @@ struct afs_cell {
enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
unsigned int dns_lookup_count; /* Counter of DNS lookups */
unsigned int debug_id;
+ unsigned int dynroot_ino; /* Inode numbers for dynroot (a pair) */
/* The volumes belonging to this cell */
+ struct rw_semaphore vs_lock; /* Lock for server->volumes */
struct rb_root volumes; /* Tree of volumes on this server */
struct hlist_head proc_volumes; /* procfs volume list */
seqlock_t volume_lock; /* For volumes */
/* Active fileserver interaction state. */
struct rb_root fs_servers; /* afs_server (by server UUID) */
- seqlock_t fs_lock; /* For fs_servers */
- struct rw_semaphore fs_open_mmaps_lock;
- struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */
- atomic_t fs_s_break; /* Counter of CB.InitCallBackState messages */
+ struct rw_semaphore fs_lock; /* For fs_servers */
/* VL server list. */
rwlock_t vl_servers_lock; /* Lock on vl_servers */
@@ -395,6 +413,7 @@ struct afs_cell {
u8 name_len; /* Length of name */
char *name; /* Cell name, case-flattened and NUL-padded */
+ char *key_desc; /* Authentication key description */
};
/*
@@ -411,13 +430,14 @@ struct afs_vlserver {
rwlock_t lock; /* Lock on addresses */
refcount_t ref;
unsigned int rtt; /* Server's current RTT in uS */
+ unsigned int debug_id;
/* Probe state */
wait_queue_head_t probe_wq;
atomic_t probe_outstanding;
spinlock_t probe_lock;
struct {
- unsigned int rtt; /* RTT in uS */
+ unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */
u32 abort_code;
short error;
unsigned short flags;
@@ -427,6 +447,7 @@ struct afs_vlserver {
#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */
} probe;
+ u16 service_id; /* Service ID we're using */
u16 port;
u16 name_len; /* Length of name */
char name[]; /* Server name, case-flattened */
@@ -476,6 +497,7 @@ struct afs_vldb_entry {
#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
+ u8 vlsf_flags[AFS_NMAXNSERVERS];
short error;
u8 nr_servers; /* Number of server records */
u8 name_len;
@@ -483,6 +505,32 @@ struct afs_vldb_entry {
};
/*
+ * Fileserver endpoint state. The records the addresses of a fileserver's
+ * endpoints and the state and result of a round of probing on them. This
+ * allows the rotation algorithm to access those results without them being
+ * erased by a subsequent round of probing.
+ */
+struct afs_endpoint_state {
+ struct rcu_head rcu;
+ struct afs_addr_list *addresses; /* The addresses being probed */
+ unsigned long responsive_set; /* Bitset of responsive endpoints */
+ unsigned long failed_set; /* Bitset of endpoints we failed to probe */
+ refcount_t ref;
+ unsigned int server_id; /* Debug ID of server */
+ unsigned int probe_seq; /* Probe sequence (from server::probe_counter) */
+ atomic_t nr_probing; /* Number of outstanding probes */
+ unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */
+ s32 abort_code;
+ short error;
+ unsigned long flags;
+#define AFS_ESTATE_RESPONDED 0 /* Set if the server responded */
+#define AFS_ESTATE_SUPERSEDED 1 /* Set if this record has been superseded */
+#define AFS_ESTATE_IS_YFS 2 /* Set if probe upgraded to YFS */
+#define AFS_ESTATE_NOT_YFS 3 /* Set if probe didn't upgrade to YFS */
+#define AFS_ESTATE_LOCAL_FAILURE 4 /* Set if there was a local failure (eg. ENOMEM) */
+};
+
+/*
* Record of fileserver with which we're actively communicating.
*/
struct afs_server {
@@ -492,70 +540,76 @@ struct afs_server {
struct afs_uuid _uuid;
};
- struct afs_addr_list __rcu *addresses;
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
- struct rb_node uuid_rb; /* Link in net->fs_servers */
- struct afs_server __rcu *uuid_next; /* Next server with same UUID */
- struct afs_server *uuid_prev; /* Previous server with same UUID */
- struct list_head probe_link; /* Link in net->fs_probe_list */
- struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
- struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
+ struct rb_node uuid_rb; /* Link in cell->fs_servers */
+ struct list_head probe_link; /* Link in net->fs_probe_* */
struct hlist_node proc_link; /* Link in net->fs_proc */
- struct work_struct initcb_work; /* Work for CB.InitCallBackState* */
- struct afs_server *gc_next; /* Next server in manager's list */
+ struct list_head volumes; /* RCU list of afs_server_entry objects */
+ struct work_struct destroyer; /* Work item to try and destroy a server */
+ struct timer_list timer; /* Management timer */
+ struct mutex cm_token_lock; /* Lock governing creation of appdata */
+ struct krb5_buffer cm_rxgk_appdata; /* Appdata to be included in RESPONSE packet */
time64_t unuse_time; /* Time at which last unused */
unsigned long flags;
#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */
#define AFS_SERVER_FL_UPDATING 1
#define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */
-#define AFS_SERVER_FL_NOT_READY 4 /* The record is not ready for use */
-#define AFS_SERVER_FL_NOT_FOUND 5 /* VL server says no such server */
-#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */
+#define AFS_SERVER_FL_UNCREATED 3 /* The record needs creating */
+#define AFS_SERVER_FL_CREATING 4 /* The record is being created */
+#define AFS_SERVER_FL_EXPIRED 5 /* The record has expired */
+#define AFS_SERVER_FL_NOT_FOUND 6 /* VL server says no such server */
+#define AFS_SERVER_FL_VL_FAIL 7 /* Failed to access VL server */
#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
#define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */
#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */
#define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */
#define AFS_SERVER_FL_HAS_FS64 19 /* Fileserver supports FS.{Fetch,Store}Data64 */
+#define AFS_SERVER_FL_NO_RENAME2 20 /* YFS Fileserver doesn't support enhanced rename */
refcount_t ref; /* Object refcount */
atomic_t active; /* Active user count */
u32 addr_version; /* Address list version */
+ u16 service_id; /* Service ID we're using. */
+ short create_error; /* Creation error */
unsigned int rtt; /* Server's current RTT in uS */
unsigned int debug_id; /* Debugging ID for traces */
/* file service access */
rwlock_t fs_lock; /* access lock */
- /* callback promise management */
- unsigned cb_s_break; /* Break-everything counter. */
-
/* Probe state */
+ struct afs_endpoint_state __rcu *endpoint_state; /* Latest endpoint/probe state */
unsigned long probed_at; /* Time last probe was dispatched (jiffies) */
wait_queue_head_t probe_wq;
- atomic_t probe_outstanding;
+ unsigned int probe_counter; /* Number of probes issued */
spinlock_t probe_lock;
- struct {
- unsigned int rtt; /* RTT in uS */
- u32 abort_code;
- short error;
- bool responded:1;
- bool is_yfs:1;
- bool not_yfs:1;
- bool local_failure:1;
- } probe;
};
+enum afs_ro_replicating {
+ AFS_RO_NOT_REPLICATING, /* Not doing replication */
+ AFS_RO_REPLICATING_USE_OLD, /* Replicating; use old version */
+ AFS_RO_REPLICATING_USE_NEW, /* Replicating; switch to new version */
+} __mode(byte);
+
/*
* Replaceable volume server list.
*/
struct afs_server_entry {
struct afs_server *server;
+ struct afs_volume *volume;
+ struct list_head slink; /* Link in server->volumes */
+ time64_t cb_expires_at; /* Time at which volume-level callback expires */
+ unsigned long flags;
+#define AFS_SE_EXCLUDED 0 /* Set if server is to be excluded in rotation */
+#define AFS_SE_VOLUME_OFFLINE 1 /* Set if volume offline notice given */
+#define AFS_SE_VOLUME_BUSY 2 /* Set if volume busy notice given */
};
struct afs_server_list {
- afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */
+ struct rcu_head rcu;
refcount_t usage;
+ bool attached; /* T if attached to servers */
+ enum afs_ro_replicating ro_replicating; /* RW->RO update (probably) in progress */
unsigned char nr_servers;
- unsigned char preferred; /* Preferred server */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
rwlock_t lock;
@@ -566,24 +620,24 @@ struct afs_server_list {
* Live AFS volume management.
*/
struct afs_volume {
- union {
- struct rcu_head rcu;
- afs_volid_t vid; /* volume ID */
- };
+ struct rcu_head rcu;
+ afs_volid_t vid; /* The volume ID of this volume */
+ afs_volid_t vids[AFS_MAXTYPES]; /* All associated volume IDs */
refcount_t ref;
+ unsigned int debug_id; /* Debugging ID for traces */
time64_t update_at; /* Time at which to next update */
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
struct rb_node cell_node; /* Link in cell->volumes */
struct hlist_node proc_link; /* Link in cell->proc_volumes */
struct super_block __rcu *sb; /* Superblock on which inodes reside */
+ struct work_struct destructor; /* Deferred destructor */
unsigned long flags;
#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */
#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */
#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */
#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */
-#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
-#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
-#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_MAYBE_NO_IBULK 4 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE 5 /* - Set if volume removed from cell->volumes */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_volume *cache; /* Caching cookie */
#endif
@@ -591,8 +645,21 @@ struct afs_volume {
rwlock_t servers_lock; /* Lock for ->servers */
unsigned int servers_seq; /* Incremented each time ->servers changes */
- unsigned cb_v_break; /* Break-everything counter. */
+ /* RO release tracking */
+ struct mutex volsync_lock; /* Time/state evaluation lock */
+ time64_t creation_time; /* Volume creation time (or TIME64_MIN) */
+ time64_t update_time; /* Volume update time (or TIME64_MIN) */
+
+ /* Callback management */
+ struct mutex cb_check_lock; /* Lock to control race to check after v_break */
+ time64_t cb_expires_at; /* Earliest volume callback expiry time */
+ atomic_t cb_ro_snapshot; /* RO volume update-from-snapshot counter */
+ atomic_t cb_v_break; /* Volume-break event counter. */
+ atomic_t cb_v_check; /* Volume-break has-been-checked counter. */
+ atomic_t cb_scrub; /* Scrub-all-data event counter. */
rwlock_t cb_v_break_lock;
+ struct rw_semaphore open_mmaps_lock;
+ struct list_head open_mmaps; /* List of vnodes that are mmapped */
afs_voltype_t type; /* type of volume */
char type_force; /* force volume type (suppress R/O -> R/W) */
@@ -624,25 +691,26 @@ struct afs_vnode {
struct afs_file_status status; /* AFS status info for this file */
afs_dataversion_t invalid_before; /* Child dentries are invalid before this */
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
- struct mutex io_lock; /* Lock for serialising I/O on this mutex */
+ struct list_head io_lock_waiters; /* Threads waiting for the I/O lock */
struct rw_semaphore validate_lock; /* lock for validating this vnode */
struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */
struct key *silly_key; /* Silly rename key */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
-#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
+#define AFS_VNODE_IO_LOCK 0 /* Set if the I/O serialisation lock is held */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
-#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
+#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
+ struct folio_queue *directory; /* Directory contents */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
@@ -651,19 +719,21 @@ struct afs_vnode {
ktime_t locked_at; /* Time at which lock obtained */
enum afs_lock_state lock_state : 8;
afs_lock_type_t lock_type : 8;
+ unsigned int directory_size; /* Amount of space in ->directory */
/* outstanding callback notification on this file */
struct work_struct cb_work; /* Work for mmap'd files */
struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */
void *cb_server; /* Server with callback/filelock */
atomic_t cb_nr_mmap; /* Number of mmaps */
- unsigned int cb_fs_s_break; /* Mass server break counter (cell->fs_s_break) */
- unsigned int cb_s_break; /* Mass break counter on ->server */
- unsigned int cb_v_break; /* Mass break counter on ->volume */
+ unsigned int cb_ro_snapshot; /* RO volume release counter on ->volume */
+ unsigned int cb_scrub; /* Scrub counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
+ unsigned int cb_v_check; /* Break check counter on ->volume */
seqlock_t cb_lock; /* Lock for ->cb_server, ->status, ->cb_*break */
- time64_t cb_expires_at; /* time at which callback expires */
+ atomic64_t cb_expires_at; /* time at which callback expires */
+#define AFS_NO_CB_PROMISE TIME64_MIN
};
static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
@@ -680,6 +750,8 @@ static inline void afs_vnode_set_cache(struct afs_vnode *vnode,
{
#ifdef CONFIG_AFS_FSCACHE
vnode->netfs.cache = cookie;
+ if (cookie)
+ mapping_set_release_always(vnode->netfs.inode.i_mapping);
#endif
}
@@ -702,47 +774,56 @@ struct afs_permits {
refcount_t usage;
unsigned short nr_permits; /* Number of records */
bool invalidated; /* Invalidated due to key change */
- struct afs_permit permits[]; /* List of permits sorted by key pointer */
+ struct afs_permit permits[] __counted_by(nr_permits); /* List of permits sorted by key pointer */
};
/*
* Error prioritisation and accumulation.
*/
struct afs_error {
- short error; /* Accumulated error */
+ s32 abort_code; /* Cumulative abort code */
+ short error; /* Cumulative error */
bool responded; /* T if server responded */
-};
-
-/*
- * Cursor for iterating over a server's address list.
- */
-struct afs_addr_cursor {
- struct afs_addr_list *alist; /* Current address list (pins ref) */
- unsigned long tried; /* Tried addresses */
- signed char index; /* Current address */
- bool responded; /* T if the current address responded */
- unsigned short nr_iterations; /* Number of address iterations */
- short error;
- u32 abort_code;
+ bool aborted; /* T if ->error is from an abort */
};
/*
* Cursor for iterating over a set of volume location servers.
*/
struct afs_vl_cursor {
- struct afs_addr_cursor ac;
struct afs_cell *cell; /* The cell we're querying */
struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
struct afs_vlserver *server; /* Server on which this resides */
+ struct afs_addr_list *alist; /* Current address list (pins ref) */
struct key *key; /* Key for the server */
- unsigned long untried; /* Bitmask of untried servers */
- short index; /* Current server */
- short error;
+ unsigned long untried_servers; /* Bitmask of untried servers */
+ unsigned long addr_tried; /* Tried addresses */
+ struct afs_error cumul_error; /* Cumulative error */
+ unsigned int debug_id;
+ s32 call_abort_code;
+ short call_error; /* Error from single call */
+ short server_index; /* Current server */
+ signed char addr_index; /* Current address */
unsigned short flags;
#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */
#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */
- unsigned short nr_iterations; /* Number of server iterations */
+ short nr_iterations; /* Number of server iterations */
+ bool call_responded; /* T if the current address responded */
+};
+
+/*
+ * Fileserver state tracking for an operation. An array of these is kept,
+ * indexed by server index.
+ */
+struct afs_server_state {
+ /* Tracking of fileserver probe state. Other operations may interfere
+ * by probing a fileserver when accessing other volumes.
+ */
+ unsigned int probe_seq;
+ unsigned long untried_addrs; /* Addresses we haven't tried yet */
+ struct wait_queue_entry probe_waiter;
+ struct afs_endpoint_state *endpoint_state; /* Endpoint state being monitored */
};
/*
@@ -763,7 +844,7 @@ struct afs_vnode_param {
struct afs_fid fid; /* Fid to access */
struct afs_status_cb scb; /* Returned status and callback promise */
afs_dataversion_t dv_before; /* Data version before the call */
- unsigned int cb_break_before; /* cb_break + cb_s_break before the call */
+ unsigned int cb_break_before; /* cb_break before the call */
u8 dv_delta; /* Expected change in data version */
bool put_vnode:1; /* T if we have a ref on the vnode */
bool need_io_lock:1; /* T if we need the I/O lock on this */
@@ -788,17 +869,17 @@ struct afs_operation {
struct afs_volume *volume; /* Volume being accessed */
struct afs_vnode_param file[2];
struct afs_vnode_param *more_files;
- struct afs_volsync volsync;
+ struct afs_volsync pre_volsync; /* Volsync before op */
+ struct afs_volsync volsync; /* Volsync returned by op */
struct dentry *dentry; /* Dentry to be altered */
struct dentry *dentry_2; /* Second dentry to be altered */
struct timespec64 mtime; /* Modification time to record */
struct timespec64 ctime; /* Change time to set */
+ struct afs_error cumul_error; /* Cumulative error */
short nr_files; /* Number of entries in file[], more_files */
- short error;
unsigned int debug_id;
unsigned int cb_v_break; /* Volume break counter before op */
- unsigned int cb_s_break; /* Server break counter before op */
union {
struct {
@@ -813,12 +894,13 @@ struct afs_operation {
bool need_rehash;
} unlink;
struct {
- struct dentry *rehash;
- struct dentry *tmp;
- bool new_negative;
+ struct dentry *rehash;
+ struct dentry *tmp;
+ unsigned int rename_flags;
+ bool new_negative;
} rename;
struct {
- struct afs_read *req;
+ struct netfs_io_subrequest *subreq;
} fetch;
struct {
afs_lock_type_t type;
@@ -828,7 +910,6 @@ struct afs_operation {
loff_t pos;
loff_t size;
loff_t i_size;
- bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
@@ -843,13 +924,19 @@ struct afs_operation {
};
/* Fileserver iteration state */
- struct afs_addr_cursor ac;
struct afs_server_list *server_list; /* Current server list (pins ref) */
struct afs_server *server; /* Server we're using (ref pinned by server_list) */
+ struct afs_endpoint_state *estate; /* Current endpoint state (doesn't pin ref) */
+ struct afs_server_state *server_states; /* States of the servers involved */
struct afs_call *call;
- unsigned long untried; /* Bitmask of untried servers */
- short index; /* Current server */
- unsigned short nr_iterations; /* Number of server iterations */
+ unsigned long untried_servers; /* Bitmask of untried servers */
+ unsigned long addr_tried; /* Tried addresses */
+ s32 call_abort_code; /* Abort code from single call */
+ short call_error; /* Error from single call */
+ short server_index; /* Current server */
+ short nr_iterations; /* Number of server iterations */
+ signed char addr_index; /* Current address */
+ bool call_responded; /* T if the current address responded */
unsigned int flags;
#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
@@ -865,6 +952,7 @@ struct afs_operation {
#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */
+#define AFS_OPERATION_ASYNC 0x2000 /* Set if should run asynchronously */
};
/*
@@ -890,60 +978,19 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
}
/*
- * We use folio->private to hold the amount of the folio that we've written to,
- * splitting the field into two parts. However, we need to represent a range
- * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
- * exceeds what we can encode.
+ * Directory iteration management.
*/
-#ifdef CONFIG_64BIT
-#define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL
-#define __AFS_FOLIO_PRIV_SHIFT 32
-#define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL
-#else
-#define __AFS_FOLIO_PRIV_MASK 0x7fffUL
-#define __AFS_FOLIO_PRIV_SHIFT 16
-#define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL
-#endif
-
-static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
-{
- int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
- return (shift > 0) ? shift : 0;
-}
-
-static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
-{
- unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
-
- /* The lower bound is inclusive */
- return x << afs_folio_dirty_resolution(folio);
-}
-
-static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
-{
- unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
-
- /* The upper bound is immediately beyond the region */
- return (x + 1) << afs_folio_dirty_resolution(folio);
-}
-
-static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
-{
- unsigned int res = afs_folio_dirty_resolution(folio);
- from >>= res;
- to = (to - 1) >> res;
- return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
-}
-
-static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
-{
- return priv | __AFS_FOLIO_PRIV_MMAPPED;
-}
-
-static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
-{
- return priv & __AFS_FOLIO_PRIV_MMAPPED;
-}
+struct afs_dir_iter {
+ struct afs_vnode *dvnode;
+ union afs_xdr_dir_block *block;
+ struct folio_queue *fq;
+ unsigned int fpos;
+ int fq_slot;
+ unsigned int loop_check;
+ u8 nr_slots;
+ u8 bucket;
+ unsigned int prev_entry;
+};
#include <trace/events/afs.h>
@@ -951,31 +998,35 @@ static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
/*
* addr_list.c
*/
-static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
-{
- if (alist)
- refcount_inc(&alist->usage);
- return alist;
-}
-extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
- unsigned short,
- unsigned short);
-extern void afs_put_addrlist(struct afs_addr_list *);
+struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason);
+extern struct afs_addr_list *afs_alloc_addrlist(unsigned int nr);
+extern void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason);
extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
const char *, size_t, char,
unsigned short, unsigned short);
+bool afs_addr_list_same(const struct afs_addr_list *a,
+ const struct afs_addr_list *b);
extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
-extern bool afs_iterate_addresses(struct afs_addr_cursor *);
-extern int afs_end_cursor(struct afs_addr_cursor *);
-extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
-extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
+extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr,
+ __be32 xdr, u16 port);
+extern int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *addr,
+ __be32 *xdr, u16 port);
+void afs_set_peer_appdata(struct afs_server *server,
+ struct afs_addr_list *old_alist,
+ struct afs_addr_list *new_alist);
+
+/*
+ * addr_prefs.c
+ */
+int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size);
+void afs_get_address_preferences_rcu(struct afs_net *net, struct afs_addr_list *alist);
+void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alist);
/*
* callback.c
*/
extern void afs_invalidate_mmap_work(struct work_struct *);
-extern void afs_server_init_callback_work(struct work_struct *work);
extern void afs_init_callback_state(struct afs_server *);
extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
@@ -983,13 +1034,15 @@ extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback
static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
{
- return vnode->cb_break + vnode->cb_v_break;
+ return vnode->cb_break + vnode->cb_ro_snapshot + vnode->cb_scrub;
}
static inline bool afs_cb_is_broken(unsigned int cb_break,
const struct afs_vnode *vnode)
{
- return cb_break != (vnode->cb_break + vnode->volume->cb_v_break);
+ return cb_break != (vnode->cb_break +
+ atomic_read(&vnode->volume->cb_ro_snapshot) +
+ atomic_read(&vnode->volume->cb_scrub));
}
/*
@@ -998,16 +1051,26 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
extern int afs_cell_init(struct afs_net *, const char *);
extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned,
enum afs_cell_trace);
-extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
- const char *, bool);
+enum afs_lookup_cell_for {
+ AFS_LOOKUP_CELL_DYNROOT,
+ AFS_LOOKUP_CELL_MOUNTPOINT,
+ AFS_LOOKUP_CELL_DIRECT_MOUNT,
+ AFS_LOOKUP_CELL_PRELOAD,
+ AFS_LOOKUP_CELL_ROOTCELL,
+ AFS_LOOKUP_CELL_ALIAS_CHECK,
+};
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ const char *vllist,
+ enum afs_lookup_cell_for reason,
+ enum afs_cell_trace trace);
extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace);
-extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace);
+void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason);
extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace);
-extern void afs_manage_cells(struct work_struct *);
-extern void afs_cells_timer(struct timer_list *);
+void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs);
extern void __net_exit afs_cell_purge(struct afs_net *);
/*
@@ -1016,6 +1079,19 @@ extern void __net_exit afs_cell_purge(struct afs_net *);
extern bool afs_cm_incoming_call(struct afs_call *);
/*
+ * cm_security.c
+ */
+void afs_process_oob_queue(struct work_struct *work);
+#ifdef CONFIG_RXGK
+int afs_create_token_key(struct afs_net *net, struct socket *socket);
+#else
+static inline int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+ return 0;
+}
+#endif
+
+/*
* dir.c
*/
extern const struct file_operations afs_dir_file_operations;
@@ -1023,15 +1099,34 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ __acquires(&dvnode->validate_lock);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
+int afs_single_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
/*
* dir_edit.c
*/
-extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
+extern void afs_edit_dir_add(struct afs_vnode *, const struct qstr *, struct afs_fid *,
enum afs_edit_dir_reason);
-extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+extern void afs_edit_dir_remove(struct afs_vnode *, const struct qstr *, enum afs_edit_dir_reason);
+void afs_edit_dir_update(struct afs_vnode *vnode, const struct qstr *name,
+ struct afs_vnode *new_dvnode, enum afs_edit_dir_reason why);
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode);
+
+/*
+ * dir_search.c
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name);
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name);
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block);
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+ struct afs_fid *_fid);
+int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
+ struct afs_fid *_fid, afs_dataversion_t *_dir_version);
/*
* dir_silly.c
@@ -1046,35 +1141,23 @@ extern int afs_silly_iput(struct dentry *, struct inode *);
extern const struct inode_operations afs_dynroot_inode_operations;
extern const struct dentry_operations afs_dynroot_dentry_operations;
-extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *);
-extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *);
-extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *);
-extern int afs_dynroot_populate(struct super_block *);
-extern void afs_dynroot_depopulate(struct super_block *);
+struct inode *afs_dynroot_iget_root(struct super_block *sb);
/*
* file.c
*/
extern const struct address_space_operations afs_file_aops;
-extern const struct address_space_operations afs_symlink_aops;
extern const struct inode_operations afs_file_inode_operations;
extern const struct file_operations afs_file_operations;
+extern const struct afs_operation_ops afs_fetch_data_operation;
extern const struct netfs_request_ops afs_req_ops;
extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
extern void afs_put_wb_key(struct afs_wb_key *);
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
-extern struct afs_read *afs_alloc_read(gfp_t);
-extern void afs_put_read(struct afs_read *);
-extern int afs_write_inode(struct inode *, struct writeback_control *);
-
-static inline struct afs_read *afs_get_read(struct afs_read *req)
-{
- refcount_inc(&req->usage);
- return req;
-}
+void afs_fetch_data_async_rx(struct work_struct *work);
+void afs_fetch_data_immediate_cancel(struct afs_call *call);
/*
* flock.c
@@ -1105,15 +1188,16 @@ extern void afs_fs_get_volume_status(struct afs_operation *);
extern void afs_fs_set_lock(struct afs_operation *);
extern void afs_fs_extend_lock(struct afs_operation *);
extern void afs_fs_release_lock(struct afs_operation *);
-extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
- struct afs_addr_cursor *, struct key *);
-extern bool afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
- struct afs_addr_cursor *, struct key *);
+int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
+ struct afs_address *addr, struct key *key);
+bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
+ struct afs_endpoint_state *estate, unsigned int addr_index,
+ struct key *key);
extern void afs_fs_inline_bulk_status(struct afs_operation *);
struct afs_acl {
u32 size;
- u8 data[];
+ u8 data[] __counted_by(size);
};
extern void afs_fs_fetch_acl(struct afs_operation *);
@@ -1125,14 +1209,10 @@ extern void afs_fs_store_acl(struct afs_operation *);
extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *);
extern int afs_put_operation(struct afs_operation *);
extern bool afs_begin_vnode_operation(struct afs_operation *);
+extern void afs_end_vnode_operation(struct afs_operation *op);
extern void afs_wait_for_operation(struct afs_operation *);
extern int afs_do_sync_operation(struct afs_operation *);
-static inline void afs_op_nomem(struct afs_operation *op)
-{
- op->error = -ENOMEM;
-}
-
static inline void afs_op_set_vnode(struct afs_operation *op, unsigned int n,
struct afs_vnode *vnode)
{
@@ -1149,12 +1229,17 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
/*
* fs_probe.c
*/
+struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
+ enum afs_estate_trace where);
+void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where);
extern void afs_fileserver_probe_result(struct afs_call *);
-extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool);
-extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct afs_addr_list *new_alist, struct key *key);
+int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr);
extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
extern void afs_fs_probe_dispatcher(struct work_struct *);
-extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
+int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
+ unsigned long exclude, bool is_intr);
extern void afs_fs_probe_cleanup(struct afs_net *);
/*
@@ -1162,17 +1247,18 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
*/
extern const struct afs_operation_ops afs_fetch_status_operation;
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback);
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
-extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
extern struct inode *afs_root_iget(struct super_block *, struct key *);
-extern bool afs_check_validity(struct afs_vnode *);
-extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *,
+extern int afs_getattr(struct mnt_idmap *idmap, const struct path *,
struct kstat *, u32, unsigned int);
-extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *);
+extern int afs_setattr(struct mnt_idmap *idmap, struct dentry *, struct iattr *);
extern void afs_evict_inode(struct inode *);
extern int afs_drop_inode(struct inode *);
@@ -1225,6 +1311,31 @@ static inline void __afs_stat(atomic_t *s)
extern int afs_abort_to_error(u32);
extern void afs_prioritise_error(struct afs_error *, int, u32);
+static inline void afs_op_nomem(struct afs_operation *op)
+{
+ op->cumul_error.error = -ENOMEM;
+}
+
+static inline int afs_op_error(const struct afs_operation *op)
+{
+ return op->cumul_error.error;
+}
+
+static inline s32 afs_op_abort_code(const struct afs_operation *op)
+{
+ return op->cumul_error.abort_code;
+}
+
+static inline int afs_op_set_error(struct afs_operation *op, int error)
+{
+ return op->cumul_error.error = error;
+}
+
+static inline void afs_op_accumulate_error(struct afs_operation *op, int error, s32 abort_code)
+{
+ afs_prioritise_error(&op->cumul_error, error, abort_code);
+}
+
/*
* mntpt.c
*/
@@ -1255,6 +1366,7 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {}
/*
* rotate.c
*/
+void afs_clear_server_states(struct afs_operation *op);
extern bool afs_select_fileserver(struct afs_operation *);
extern void afs_dump_edestaddrreq(const struct afs_operation *);
@@ -1267,8 +1379,10 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
-extern void afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t);
-extern long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
+void afs_deferred_put_call(struct afs_call *call);
+void afs_make_call(struct afs_call *call, gfp_t gfp);
+void afs_deliver_to_call(struct afs_call *call);
+void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
size_t, size_t);
@@ -1278,15 +1392,41 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
extern int afs_extract_data(struct afs_call *, bool);
extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
+static inline struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int r;
+
+ __refcount_inc(&call->ref, &r);
+
+ trace_afs_call(call->debug_id, why, r + 1,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
+static inline void afs_see_call(struct afs_call *call, enum afs_call_trace why)
+{
+ int r = refcount_read(&call->ref);
+
+ trace_afs_call(call->debug_id, why, r,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+}
+
static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
gfp_t gfp)
{
- op->call = call;
- op->type = call->type;
- call->op = op;
- call->key = op->key;
- call->intr = !(op->flags & AFS_OPERATION_UNINTR);
- afs_make_call(&op->ac, call, gfp);
+ struct afs_addr_list *alist = op->estate->addresses;
+
+ op->call = call;
+ op->type = call->type;
+ call->op = op;
+ call->key = op->key;
+ call->intr = !(op->flags & AFS_OPERATION_UNINTR);
+ call->peer = rxrpc_kernel_get_peer(alist->addrs[op->addr_index].peer);
+ call->service_id = op->server->service_id;
+ afs_make_call(call, gfp);
}
static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
@@ -1387,7 +1527,7 @@ extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
extern struct key *afs_request_key(struct afs_cell *);
extern struct key *afs_request_key_rcu(struct afs_cell *);
extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
-extern int afs_permission(struct user_namespace *, struct inode *, int);
+extern int afs_permission(struct mnt_idmap *, struct inode *, int);
extern void __exit afs_clean_up_permit_cache(void);
/*
@@ -1395,20 +1535,29 @@ extern void __exit afs_clean_up_permit_cache(void);
*/
extern spinlock_t afs_server_peer_lock;
-extern struct afs_server *afs_find_server(struct afs_net *,
- const struct sockaddr_rxrpc *);
-extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
+struct afs_server *afs_find_server(const struct rxrpc_peer *peer);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32);
extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
-extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace);
-extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
-extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace);
+struct afs_server *afs_use_server(struct afs_server *server, bool activate,
+ enum afs_server_trace reason);
+void afs_unuse_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason);
+void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason);
extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
-extern void afs_manage_servers(struct work_struct *);
-extern void afs_servers_timer(struct timer_list *);
+void afs_purge_servers(struct afs_cell *cell);
extern void afs_fs_probe_timer(struct timer_list *);
-extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_check_server_record(struct afs_operation *, struct afs_server *);
+void __net_exit afs_wait_for_servers(struct afs_net *net);
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key);
+
+static inline void afs_see_server(struct afs_server *server, enum afs_server_trace trace)
+{
+ int r = refcount_read(&server->ref);
+ int a = atomic_read(&server->active);
+
+ trace_afs_server(server->debug_id, r, a, trace);
+
+}
static inline void afs_inc_servers_outstanding(struct afs_net *net)
{
@@ -1436,10 +1585,14 @@ static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list
}
extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
-extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
- struct afs_vldb_entry *,
- u8);
+struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
+ struct key *key,
+ struct afs_vldb_entry *vldb);
extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
+void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist);
+void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist,
+ struct afs_server_list *old);
+void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist);
/*
* super.c
@@ -1448,13 +1601,24 @@ extern int __init afs_fs_init(void);
extern void afs_fs_exit(void);
/*
+ * validation.c
+ */
+bool afs_check_validity(const struct afs_vnode *vnode);
+int afs_update_volume_state(struct afs_operation *op);
+int afs_validate(struct afs_vnode *vnode, struct key *key);
+
+/*
* vlclient.c
*/
extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
-extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *,
- struct key *, struct afs_vlserver *, unsigned int);
+struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
+ struct afs_addr_list *alist,
+ unsigned int addr_index,
+ struct key *key,
+ struct afs_vlserver *server,
+ unsigned int server_index);
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *);
@@ -1508,36 +1672,27 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern int afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
-extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
+void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason);
extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
/*
* write.c
*/
-#ifdef CONFIG_AFS_FSCACHE
-bool afs_dirty_folio(struct address_space *, struct folio *);
-#else
-#define afs_dirty_folio filemap_dirty_folio
-#endif
-extern int afs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata);
-extern int afs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
-extern int afs_writepage(struct page *, struct writeback_control *);
+void afs_prepare_write(struct netfs_io_subrequest *subreq);
+void afs_issue_write(struct netfs_io_subrequest *subreq);
+void afs_begin_writeback(struct netfs_io_request *wreq);
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream);
extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
extern void afs_prune_wb_keys(struct afs_vnode *);
-int afs_launder_folio(struct folio *);
/*
* xattr.c
*/
-extern const struct xattr_handler *afs_xattr_handlers[];
+extern const struct xattr_handler * const afs_xattr_handlers[];
/*
* yfsclient.c
@@ -1551,6 +1706,9 @@ extern void yfs_fs_remove_dir(struct afs_operation *);
extern void yfs_fs_link(struct afs_operation *);
extern void yfs_fs_symlink(struct afs_operation *);
extern void yfs_fs_rename(struct afs_operation *);
+void yfs_fs_rename_replace(struct afs_operation *op);
+void yfs_fs_rename_noreplace(struct afs_operation *op);
+void yfs_fs_rename_exchange(struct afs_operation *op);
extern void yfs_fs_store_data(struct afs_operation *);
extern void yfs_fs_setattr(struct afs_operation *);
extern void yfs_fs_get_volume_status(struct afs_operation *);
@@ -1596,7 +1754,7 @@ static inline void afs_update_dentry_version(struct afs_operation *op,
struct afs_vnode_param *dir_vp,
struct dentry *dentry)
{
- if (!op->error)
+ if (!op->cumul_error.error)
dentry->d_fsdata =
(void *)(unsigned long)dir_vp->scb.status.data_version;
}
@@ -1635,6 +1793,38 @@ static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
return -EIO;
}
+/*
+ * Set the callback promise on a vnode.
+ */
+static inline void afs_set_cb_promise(struct afs_vnode *vnode, time64_t expires_at,
+ enum afs_cb_promise_trace trace)
+{
+ atomic64_set(&vnode->cb_expires_at, expires_at);
+ trace_afs_cb_promise(vnode, trace);
+}
+
+/*
+ * Clear the callback promise on a vnode, returning true if it was promised.
+ */
+static inline bool afs_clear_cb_promise(struct afs_vnode *vnode,
+ enum afs_cb_promise_trace trace)
+{
+ trace_afs_cb_promise(vnode, trace);
+ return atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE;
+}
+
+/*
+ * Mark a directory as being invalid.
+ */
+static inline void afs_invalidate_dir(struct afs_vnode *dvnode,
+ enum afs_dir_invalid_trace trace)
+{
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ trace_afs_dir_invalid(dvnode, trace);
+ afs_stat_v(dvnode, n_inval);
+ }
+}
+
/*****************************************************************************/
/*
* debug tracing
diff --git a/fs/afs/main.c b/fs/afs/main.c
index eae288c8d40a..e6bb8237db98 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -41,8 +41,6 @@ const char afs_init_sysname[] = "arm_linux26";
const char afs_init_sysname[] = "aarch64_linux26";
#elif defined(CONFIG_X86_32)
const char afs_init_sysname[] = "i386_linux26";
-#elif defined(CONFIG_IA64)
-const char afs_init_sysname[] = "ia64_linux26";
#elif defined(CONFIG_PPC64)
const char afs_init_sysname[] = "ppc64_linux26";
#elif defined(CONFIG_PPC32)
@@ -75,29 +73,21 @@ static int __net_init afs_net_init(struct net *net_ns)
generate_random_uuid((unsigned char *)&net->uuid);
INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+ INIT_WORK(&net->rx_oob_work, afs_process_oob_queue);
mutex_init(&net->socket_mutex);
net->cells = RB_ROOT;
+ idr_init(&net->cells_dyn_ino);
init_rwsem(&net->cells_lock);
- INIT_WORK(&net->cells_manager, afs_manage_cells);
- timer_setup(&net->cells_timer, afs_cells_timer, 0);
-
mutex_init(&net->cells_alias_lock);
mutex_init(&net->proc_cells_lock);
INIT_HLIST_HEAD(&net->proc_cells);
seqlock_init(&net->fs_lock);
- net->fs_servers = RB_ROOT;
INIT_LIST_HEAD(&net->fs_probe_fast);
INIT_LIST_HEAD(&net->fs_probe_slow);
INIT_HLIST_HEAD(&net->fs_proc);
- INIT_HLIST_HEAD(&net->fs_addresses4);
- INIT_HLIST_HEAD(&net->fs_addresses6);
- seqlock_init(&net->fs_addr_lock);
-
- INIT_WORK(&net->fs_manager, afs_manage_servers);
- timer_setup(&net->fs_timer, afs_servers_timer, 0);
INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher);
timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0);
atomic_set(&net->servers_outstanding, 1);
@@ -133,13 +123,14 @@ error_open_socket:
net->live = false;
afs_fs_probe_cleanup(net);
afs_cell_purge(net);
- afs_purge_servers(net);
+ afs_wait_for_servers(net);
error_cell_init:
net->live = false;
afs_proc_cleanup(net);
error_proc:
afs_put_sysnames(net->sysnames);
error_sysnames:
+ idr_destroy(&net->cells_dyn_ino);
net->live = false;
return ret;
}
@@ -154,10 +145,12 @@ static void __net_exit afs_net_exit(struct net *net_ns)
net->live = false;
afs_fs_probe_cleanup(net);
afs_cell_purge(net);
- afs_purge_servers(net);
+ afs_wait_for_servers(net);
afs_close_socket(net);
afs_proc_cleanup(net);
afs_put_sysnames(net->sysnames);
+ idr_destroy(&net->cells_dyn_ino);
+ kfree_rcu(rcu_access_pointer(net->address_prefs), rcu);
}
static struct pernet_operations afs_net_ops = {
@@ -176,13 +169,13 @@ static int __init afs_init(void)
printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
- afs_wq = alloc_workqueue("afs", 0, 0);
+ afs_wq = alloc_workqueue("afs", WQ_PERCPU, 0);
if (!afs_wq)
goto error_afs_wq;
- afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!afs_async_calls)
goto error_async;
- afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
+ afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!afs_lock_manager)
goto error_lockmgr;
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 805328ca5428..c8a7f266080d 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
+#include <crypto/krb5.h>
#include "internal.h"
#include "afs_fs.h"
#include "protocol_uae.h"
@@ -103,7 +104,34 @@ int afs_abort_to_error(u32 abort_code)
case RXKADDATALEN: return -EKEYREJECTED;
case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+ case RXGK_INCONSISTENCY: return -EPROTO;
+ case RXGK_PACKETSHORT: return -EPROTO;
+ case RXGK_BADCHALLENGE: return -EPROTO;
+ case RXGK_SEALEDINCON: return -EKEYREJECTED;
+ case RXGK_NOTAUTH: return -EKEYREJECTED;
+ case RXGK_EXPIRED: return -EKEYEXPIRED;
+ case RXGK_BADLEVEL: return -EKEYREJECTED;
+ case RXGK_BADKEYNO: return -EKEYREJECTED;
+ case RXGK_NOTRXGK: return -EKEYREJECTED;
+ case RXGK_UNSUPPORTED: return -EKEYREJECTED;
+ case RXGK_GSSERROR: return -EKEYREJECTED;
+#ifdef RXGK_BADETYPE
+ case RXGK_BADETYPE: return -ENOPKG;
+#endif
+#ifdef RXGK_BADTOKEN
+ case RXGK_BADTOKEN: return -EKEYREJECTED;
+#endif
+#ifdef RXGK_BADETYPE
+ case RXGK_DATALEN: return -EPROTO;
+#endif
+#ifdef RXGK_BADQOP
+ case RXGK_BADQOP: return -EKEYREJECTED;
+#endif
+
+ case KRB5_PROG_KEYTYPE_NOSUPP: return -ENOPKG;
+
case RXGEN_OPCODE: return -ENOTSUPP;
+ case RX_INVALID_OPERATION: return -ENOTSUPP;
default: return -EREMOTEIO;
}
@@ -116,6 +144,8 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
{
switch (error) {
case 0:
+ e->aborted = false;
+ e->error = 0;
return;
default:
if (e->error == -ETIMEDOUT ||
@@ -161,12 +191,16 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
if (e->responded)
return;
e->error = error;
+ e->aborted = false;
return;
case -ECONNABORTED:
- error = afs_abort_to_error(abort_code);
- fallthrough;
+ e->error = afs_abort_to_error(abort_code);
+ e->aborted = true;
+ e->responded = true;
+ return;
case -ENETRESET: /* Responded, but we seem to have changed address */
+ e->aborted = false;
e->responded = true;
e->error = error;
return;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 97f50e9fd9eb..57c204a3c04e 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -30,7 +30,7 @@ const struct file_operations afs_mntpt_file_operations = {
const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
- .readlink = page_readlink,
+ .readlink = afs_readlink,
.getattr = afs_getattr,
};
@@ -87,7 +87,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ctx->force = true;
}
if (ctx->cell) {
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_mntpt);
ctx->cell = NULL;
}
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
@@ -107,7 +107,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (size > AFS_MAXCELLNAME)
return -ENAMETOOLONG;
- cell = afs_lookup_cell(ctx->net, p, size, NULL, false);
+ cell = afs_lookup_cell(ctx->net, p, size, NULL,
+ AFS_LOOKUP_CELL_MOUNTPOINT,
+ afs_cell_trace_use_lookup_mntpt);
if (IS_ERR(cell)) {
pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
return PTR_ERR(cell);
@@ -118,9 +120,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ctx->volnamesz = sizeof(afs_root_volume) - 1;
} else {
/* read the contents of the AFS special symlink */
- struct page *page;
+ DEFINE_DELAYED_CALL(cleanup);
+ const char *content;
loff_t size = i_size_read(d_inode(mntpt));
- char *buf;
if (src_as->cell)
ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
@@ -128,18 +130,24 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (size < 2 || size > PAGE_SIZE - 1)
return -EINVAL;
- page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ content = afs_get_link(mntpt, d_inode(mntpt), &cleanup);
+ if (IS_ERR(content)) {
+ do_delayed_call(&cleanup);
+ return PTR_ERR(content);
+ }
- buf = kmap(page);
ret = -EINVAL;
- if (buf[size - 1] == '.')
- ret = vfs_parse_fs_string(fc, "source", buf, size - 1);
- kunmap(page);
- put_page(page);
+ if (content[size - 1] == '.')
+ ret = vfs_parse_fs_qstr(fc, "source",
+ &QSTR_LEN(content, size - 1));
+ do_delayed_call(&cleanup);
if (ret < 0)
return ret;
+
+ /* Don't cross a backup volume mountpoint from a backup volume */
+ if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
+ ctx->type == AFSVL_BACKVOL)
+ return -ENODEV;
}
return 0;
@@ -183,7 +191,6 @@ struct vfsmount *afs_d_automount(struct path *path)
if (IS_ERR(newmnt))
return newmnt;
- mntget(newmnt); /* prevent immediate expiration */
mnt_set_expiry(newmnt, &afs_vfsmounts);
queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
afs_mntpt_expiry_timeout * HZ);
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 2a0c83d71565..44520549b509 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -122,14 +122,16 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
if (strcmp(buf, "add") == 0) {
struct afs_cell *cell;
- cell = afs_lookup_cell(net, name, strlen(name), args, true);
+ cell = afs_lookup_cell(net, name, strlen(name), args,
+ AFS_LOOKUP_CELL_PRELOAD,
+ afs_cell_trace_use_lookup_add);
if (IS_ERR(cell)) {
ret = PTR_ERR(cell);
goto done;
}
if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
- afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin);
+ afs_unuse_cell(cell, afs_cell_trace_unuse_no_pin);
} else {
goto inval;
}
@@ -147,6 +149,56 @@ inval:
}
/*
+ * Display the list of addr_prefs known to the namespace.
+ */
+static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
+{
+ struct afs_addr_preference_list *preflist;
+ struct afs_addr_preference *pref;
+ struct afs_net *net = afs_seq2net_single(m);
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } addr;
+ unsigned int i;
+ char buf[44]; /* Maximum ipv6 + max subnet is 43 */
+
+ rcu_read_lock();
+ preflist = rcu_dereference(net->address_prefs);
+
+ if (!preflist) {
+ seq_puts(m, "NO PREFS\n");
+ goto out;
+ }
+
+ seq_printf(m, "PROT SUBNET PRIOR (v=%u n=%u/%u/%u)\n",
+ preflist->version, preflist->ipv6_off, preflist->nr, preflist->max_prefs);
+
+ memset(&addr, 0, sizeof(addr));
+
+ for (i = 0; i < preflist->nr; i++) {
+ pref = &preflist->prefs[i];
+
+ addr.sin.sin_family = pref->family;
+ if (pref->family == AF_INET) {
+ memcpy(&addr.sin.sin_addr, &pref->ipv4_addr,
+ sizeof(addr.sin.sin_addr));
+ snprintf(buf, sizeof(buf), "%pISc/%u", &addr.sin, pref->subnet_mask);
+ seq_printf(m, "UDP %-43.43s %5u\n", buf, pref->prio);
+ } else {
+ memcpy(&addr.sin6.sin6_addr, &pref->ipv6_addr,
+ sizeof(addr.sin6.sin6_addr));
+ snprintf(buf, sizeof(buf), "%pISc/%u", &addr.sin6, pref->subnet_mask);
+ seq_printf(m, "UDP %-43.43s %5u\n", buf, pref->prio);
+ }
+ }
+
+out:
+ rcu_read_unlock();
+ return 0;
+}
+
+/*
* Display the name of the current workstation cell.
*/
static int afs_proc_rootcell_show(struct seq_file *m, void *v)
@@ -156,7 +208,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v)
net = afs_seq2net_single(m);
down_read(&net->cells_lock);
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
if (cell)
seq_printf(m, "%s\n", cell->name);
up_read(&net->cells_lock);
@@ -190,7 +242,13 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size)
/* determine command to perform */
_debug("rootcell=%s", buf);
- ret = afs_cell_init(net, buf);
+ ret = -EEXIST;
+ inode_lock(file_inode(file));
+ if (!rcu_access_pointer(net->ws_cell))
+ ret = afs_cell_init(net, buf);
+ else
+ printk("busy\n");
+ inode_unlock(file_inode(file));
out:
_leave(" = %d", ret);
@@ -307,7 +365,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
for (i = 0; i < alist->nr_addrs; i++)
seq_printf(m, " %c %pISpc\n",
alist->preferred == i ? '>' : '-',
- &alist->addrs[i].transport);
+ rxrpc_kernel_remote_addr(alist->addrs[i].peer));
}
seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt);
seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n",
@@ -375,32 +433,51 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = {
*/
static int afs_proc_servers_show(struct seq_file *m, void *v)
{
- struct afs_server *server;
+ struct afs_endpoint_state *estate;
struct afs_addr_list *alist;
+ struct afs_server *server;
+ unsigned long failed;
int i;
if (v == SEQ_START_TOKEN) {
- seq_puts(m, "UUID REF ACT\n");
+ seq_puts(m, "UUID REF ACT CELL\n");
return 0;
}
server = list_entry(v, struct afs_server, proc_link);
- alist = rcu_dereference(server->addresses);
- seq_printf(m, "%pU %3d %3d\n",
+ seq_printf(m, "%pU %3d %3d %s\n",
&server->uuid,
refcount_read(&server->ref),
- atomic_read(&server->active));
- seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n",
- server->flags, server->rtt, server->cb_s_break);
- seq_printf(m, " - probe: last=%d out=%d\n",
- (int)(jiffies - server->probed_at) / HZ,
- atomic_read(&server->probe_outstanding));
- seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n",
- alist->version, alist->responded, alist->failed);
- for (i = 0; i < alist->nr_addrs; i++)
- seq_printf(m, " [%x] %pISpc%s\n",
- i, &alist->addrs[i].transport,
- alist->preferred == i ? "*" : "");
+ atomic_read(&server->active),
+ server->cell->name);
+ seq_printf(m, " - info: fl=%lx rtt=%u\n",
+ server->flags, server->rtt);
+ seq_printf(m, " - probe: last=%d\n",
+ (int)(jiffies - server->probed_at) / HZ);
+
+ estate = rcu_dereference(server->endpoint_state);
+ if (!estate)
+ goto out;
+ failed = estate->failed_set;
+ seq_printf(m, " - ESTATE pq=%x np=%u rsp=%lx f=%lx\n",
+ estate->probe_seq, atomic_read(&estate->nr_probing),
+ estate->responsive_set, estate->failed_set);
+
+ alist = estate->addresses;
+ seq_printf(m, " - ALIST v=%u ap=%u\n",
+ alist->version, alist->addr_pref_version);
+ for (i = 0; i < alist->nr_addrs; i++) {
+ const struct afs_address *addr = &alist->addrs[i];
+
+ seq_printf(m, " [%x] %pISpc%s rtt=%d err=%d p=%u\n",
+ i, rxrpc_kernel_remote_addr(addr->peer),
+ alist->preferred == i ? "*" :
+ test_bit(i, &failed) ? "!" : "",
+ rxrpc_kernel_get_srtt(addr->peer),
+ addr->last_error, addr->prio);
+ }
+
+out:
return 0;
}
@@ -681,7 +758,11 @@ int afs_proc_init(struct afs_net *net)
&afs_proc_sysname_ops,
afs_proc_sysname_write,
sizeof(struct seq_net_private),
- NULL))
+ NULL) ||
+ !proc_create_net_single_write("addr_prefs", 0644, p,
+ afs_proc_addr_prefs_show,
+ afs_proc_addr_prefs_write,
+ NULL))
goto error_tree;
net->proc_afs = p;
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index e4cd89c44c46..b2f06c1917c2 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -50,6 +50,9 @@ enum YFS_FS_Operations {
YFSREMOVEACL = 64171,
YFSREMOVEFILE2 = 64173,
YFSSTOREOPAQUEACL2 = 64174,
+ YFSRENAME_REPLACE = 64176,
+ YFSRENAME_NOREPLACE = 64177,
+ YFSRENAME_EXCHANGE = 64187,
YFSINLINEBULKSTATUS = 64536, /* YFS Fetch multiple file statuses with errors */
YFSFETCHDATA64 = 64537, /* YFS Fetch file data */
YFSSTOREDATA64 = 64538, /* YFS Store file data */
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index a840c3588ebb..6a4e7da10fc4 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -13,6 +13,19 @@
#include <linux/sched/signal.h>
#include "internal.h"
#include "afs_fs.h"
+#include "protocol_uae.h"
+
+void afs_clear_server_states(struct afs_operation *op)
+{
+ unsigned int i;
+
+ if (op->server_states) {
+ for (i = 0; i < op->server_list->nr_servers; i++)
+ afs_put_endpoint_state(op->server_states[i].endpoint_state,
+ afs_estate_trace_put_server_state);
+ kfree(op->server_states);
+ }
+}
/*
* Begin iteration through a server list, starting with the vnode's last used
@@ -25,14 +38,41 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
void *cb_server;
int i;
+ trace_afs_rotate(op, afs_rotate_trace_start, 0);
+
read_lock(&op->volume->servers_lock);
op->server_list = afs_get_serverlist(
rcu_dereference_protected(op->volume->servers,
lockdep_is_held(&op->volume->servers_lock)));
read_unlock(&op->volume->servers_lock);
- op->untried = (1UL << op->server_list->nr_servers) - 1;
- op->index = READ_ONCE(op->server_list->preferred);
+ op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]),
+ GFP_KERNEL);
+ if (!op->server_states) {
+ afs_op_nomem(op);
+ trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
+ return false;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < op->server_list->nr_servers; i++) {
+ struct afs_endpoint_state *estate;
+ struct afs_server_state *s = &op->server_states[i];
+
+ server = op->server_list->servers[i].server;
+ estate = rcu_dereference(server->endpoint_state);
+ s->endpoint_state = afs_get_endpoint_state(estate,
+ afs_estate_trace_get_server_state);
+ s->probe_seq = estate->probe_seq;
+ s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
+ init_waitqueue_entry(&s->probe_waiter, current);
+ afs_get_address_preferences(op->net, estate->addresses);
+ }
+ rcu_read_unlock();
+
+
+ op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
+ op->server_index = -1;
cb_server = vnode->cb_server;
if (cb_server) {
@@ -40,7 +80,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
for (i = 0; i < op->server_list->nr_servers; i++) {
server = op->server_list->servers[i].server;
if (server == cb_server) {
- op->index = i;
+ op->server_index = i;
goto found_interest;
}
}
@@ -50,7 +90,8 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
* and have to return an error.
*/
if (op->flags & AFS_OPERATION_CUR_ONLY) {
- op->error = -ESTALE;
+ afs_op_set_error(op, -ESTALE);
+ trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
return false;
}
@@ -58,7 +99,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
write_seqlock(&vnode->cb_lock);
ASSERTCMP(cb_server, ==, vnode->cb_server);
vnode->cb_server = NULL;
- if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server))
vnode->cb_break++;
write_sequnlock(&vnode->cb_lock);
}
@@ -70,7 +111,7 @@ found_interest:
/*
* Post volume busy note.
*/
-static void afs_busy(struct afs_volume *volume, u32 abort_code)
+static void afs_busy(struct afs_operation *op, u32 abort_code)
{
const char *m;
@@ -81,7 +122,8 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
default: m = "busy"; break;
}
- pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
+ pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
+ op->volume->vid, op->volume->name, &op->server->uuid, m);
}
/*
@@ -89,10 +131,11 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
*/
static bool afs_sleep_and_retry(struct afs_operation *op)
{
+ trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
if (!(op->flags & AFS_OPERATION_UNINTR)) {
msleep_interruptible(1000);
if (signal_pending(current)) {
- op->error = -ERESTARTSYS;
+ afs_op_set_error(op, -ERESTARTSYS);
return false;
}
} else {
@@ -111,62 +154,105 @@ bool afs_select_fileserver(struct afs_operation *op)
struct afs_addr_list *alist;
struct afs_server *server;
struct afs_vnode *vnode = op->file[0].vnode;
- struct afs_error e;
- u32 rtt;
- int error = op->ac.error, i;
+ unsigned long set, failed;
+ s32 abort_code = op->call_abort_code;
+ int best_prio = 0;
+ int error = op->call_error, addr_index, i, j;
- _enter("%lx[%d],%lx[%d],%d,%d",
- op->untried, op->index,
- op->ac.tried, op->ac.index,
- error, op->ac.abort_code);
+ op->nr_iterations++;
+
+ _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
+ op->debug_id, op->nr_iterations, op->volume->vid,
+ op->server_index, op->untried_servers,
+ op->addr_index, op->addr_tried,
+ error, abort_code);
if (op->flags & AFS_OPERATION_STOP) {
+ trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
_leave(" = f [stopped]");
return false;
}
- op->nr_iterations++;
-
- /* Evaluate the result of the previous operation, if there was one. */
- switch (error) {
- case SHRT_MAX:
+ if (op->nr_iterations == 0)
goto start;
+ WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
+ trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
+
+ /* Evaluate the result of the previous operation, if there was one. */
+ switch (op->call_error) {
case 0:
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
+ op->cumul_error.responded = true;
+
+ /* We succeeded, but we may need to redo the op from another
+ * server if we're looking at a set of RO volumes where some of
+ * the servers have not yet been brought up to date lest we
+ * regress the data. We only switch to the new version once
+ * >=50% of the servers are updated.
+ */
+ error = afs_update_volume_state(op);
+ if (error != 0) {
+ if (error == 1) {
+ afs_sleep_and_retry(op);
+ goto restart_from_beginning;
+ }
+ afs_op_set_error(op, error);
+ goto failed;
+ }
+ fallthrough;
default:
/* Success or local failure. Stop. */
- op->error = error;
+ afs_op_set_error(op, error);
op->flags |= AFS_OPERATION_STOP;
+ trace_afs_rotate(op, afs_rotate_trace_stop, error);
_leave(" = f [okay/local %d]", error);
return false;
case -ECONNABORTED:
/* The far side rejected the operation on some grounds. This
* might involve the server being busy or the volume having been moved.
+ *
+ * Note that various V* errors should not be sent to a cache manager
+ * by a fileserver as they should be translated to more modern UAE*
+ * errors instead. IBM AFS and OpenAFS fileservers, however, do leak
+ * these abort codes.
*/
- switch (op->ac.abort_code) {
+ trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
+ op->cumul_error.responded = true;
+ switch (abort_code) {
case VNOVOL:
/* This fileserver doesn't know about the volume.
* - May indicate that the VL is wrong - retry once and compare
* the results.
* - May indicate that the fileserver couldn't attach to the vol.
+ * - The volume might have been temporarily removed so that it can
+ * be replaced by a volume restore. "vos" might have ended one
+ * transaction and has yet to create the next.
+ * - The volume might not be blessed or might not be in-service
+ * (administrative action).
*/
if (op->flags & AFS_OPERATION_VNOVOL) {
- op->error = -EREMOTEIO;
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
goto next_server;
}
write_lock(&op->volume->servers_lock);
- op->server_list->vnovol_mask |= 1 << op->index;
+ op->server_list->vnovol_mask |= 1 << op->server_index;
write_unlock(&op->volume->servers_lock);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
error = afs_check_volume_status(op->volume, op);
- if (error < 0)
- goto failed_set_error;
+ if (error < 0) {
+ afs_op_set_error(op, error);
+ goto failed;
+ }
if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
- op->error = -ENOMEDIUM;
+ afs_op_set_error(op, -ENOMEDIUM);
goto failed;
}
@@ -174,7 +260,7 @@ bool afs_select_fileserver(struct afs_operation *op)
* it's the fileserver having trouble.
*/
if (rcu_access_pointer(op->volume->servers) == op->server_list) {
- op->error = -EREMOTEIO;
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
goto next_server;
}
@@ -183,50 +269,99 @@ bool afs_select_fileserver(struct afs_operation *op)
_leave(" = t [vnovol]");
return true;
- case VSALVAGE: /* TODO: Should this return an error or iterate? */
case VVOLEXISTS:
- case VNOSERVICE:
case VONLINE:
- case VDISKFULL:
- case VOVERQUOTA:
- op->error = afs_abort_to_error(op->ac.abort_code);
+ /* These should not be returned from the fileserver. */
+ pr_warn("Fileserver returned unexpected abort %d\n",
+ abort_code);
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
goto next_server;
+ case VNOSERVICE:
+ /* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
+ * if the volume was neither in-service nor administratively
+ * blessed. All usage was replaced by VNOVOL because AFS 3.1 and
+ * earlier cache managers did not handle VNOSERVICE and assumed
+ * it was the client OSes errno 105.
+ *
+ * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
+ * fileserver idle dead time error which was sent in place of
+ * RX_CALL_TIMEOUT (-3). The error was intended to be sent if the
+ * fileserver took too long to send a reply to the client.
+ * RX_CALL_TIMEOUT would have caused the cache manager to mark the
+ * server down whereas VNOSERVICE since AFS 3.2 would cause cache
+ * manager to temporarily (up to 15 minutes) mark the volume
+ * instance as unusable.
+ *
+ * The idle dead logic resulted in cache inconsistency since a
+ * state changing call that the cache manager assumed was dead
+ * could still be processed to completion by the fileserver. This
+ * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
+ * returned. However, many 1.4.8 through 1.6.24 fileservers are
+ * still in existence.
+ *
+ * AuriStorFS fileservers have never returned VNOSERVICE.
+ *
+ * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
+ */
+ case RX_CALL_TIMEOUT:
+ afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
+ goto next_server;
+
+ case VSALVAGING: /* This error should not be leaked to cache managers
+ * but is from OpenAFS demand attach fileservers.
+ * It should be treated as an alias for VOFFLINE.
+ */
+ case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
case VOFFLINE:
- if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
- afs_busy(op->volume, op->ac.abort_code);
- clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+ /* The volume is in use by the volserver or another volume utility
+ * for an operation that might alter the contents. The volume is
+ * expected to come back but it might take a long time (could be
+ * days).
+ */
+ if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags)) {
+ afs_busy(op, abort_code);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
}
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
- op->error = -EADV;
- goto failed;
- }
- if (op->flags & AFS_OPERATION_CUR_ONLY) {
- op->error = -ESTALE;
+ afs_op_set_error(op, -EADV);
goto failed;
}
goto busy;
- case VSALVAGING:
- case VRESTARTING:
+ case VRESTARTING: /* The fileserver is either shutting down or starting up. */
case VBUSY:
- /* Retry after going round all the servers unless we
- * have a file lock we need to maintain.
+ /* The volume is in use by the volserver or another volume
+ * utility for an operation that is not expected to alter the
+ * contents of the volume. VBUSY does not need to be returned
+ * for a ROVOL or BACKVOL bound to an ITBusy volserver
+ * transaction. The fileserver is permitted to continue serving
+ * content from ROVOLs and BACKVOLs during an ITBusy transaction
+ * because the content will not change. However, many fileserver
+ * releases do return VBUSY for ROVOL and BACKVOL instances under
+ * many circumstances.
+ *
+ * Retry after going round all the servers unless we have a file
+ * lock we need to maintain.
*/
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
- op->error = -EBUSY;
+ afs_op_set_error(op, -EBUSY);
goto failed;
}
- if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
- afs_busy(op->volume, op->ac.abort_code);
- clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
+ if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags)) {
+ afs_busy(op, abort_code);
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
}
busy:
if (op->flags & AFS_OPERATION_CUR_ONLY) {
if (!afs_sleep_and_retry(op))
goto failed;
- /* Retry with same server & address */
+ /* Retry with same server & address */
_leave(" = t [vbusy]");
return true;
}
@@ -243,7 +378,7 @@ bool afs_select_fileserver(struct afs_operation *op)
* honour, just in case someone sets up a loop.
*/
if (op->flags & AFS_OPERATION_VMOVED) {
- op->error = -EREMOTEIO;
+ afs_op_set_error(op, -EREMOTEIO);
goto failed;
}
op->flags |= AFS_OPERATION_VMOVED;
@@ -251,8 +386,10 @@ bool afs_select_fileserver(struct afs_operation *op)
set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
error = afs_check_volume_status(op->volume, op);
- if (error < 0)
- goto failed_set_error;
+ if (error < 0) {
+ afs_op_set_error(op, error);
+ goto failed;
+ }
/* If the server list didn't change, then the VLDB is
* out of sync with the fileservers. This is hopefully
@@ -264,22 +401,60 @@ bool afs_select_fileserver(struct afs_operation *op)
* TODO: Retry a few times with sleeps.
*/
if (rcu_access_pointer(op->volume->servers) == op->server_list) {
- op->error = -ENOMEDIUM;
+ afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
goto failed;
}
goto restart_from_beginning;
+ case UAEIO:
+ case VIO:
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
+ if (op->volume->type != AFSVL_RWVOL)
+ goto next_server;
+ goto failed;
+
+ case VDISKFULL:
+ case UAENOSPC:
+ /* The partition is full. Only applies to RWVOLs.
+ * Translate locally and return ENOSPC.
+ * No replicas to failover to.
+ */
+ afs_op_set_error(op, -ENOSPC);
+ goto failed_but_online;
+
+ case VOVERQUOTA:
+ case UAEDQUOT:
+ /* Volume is full. Only applies to RWVOLs.
+ * Translate locally and return EDQUOT.
+ * No replicas to failover to.
+ */
+ afs_op_set_error(op, -EDQUOT);
+ goto failed_but_online;
+
+ case RX_INVALID_OPERATION:
+ case RXGEN_OPCODE:
+ /* Handle downgrading to an older operation. */
+ afs_op_set_error(op, -ENOTSUPP);
+ if (op->flags & AFS_OPERATION_DOWNGRADE) {
+ op->flags &= ~AFS_OPERATION_DOWNGRADE;
+ goto go_again;
+ }
+ goto failed_but_online;
+
default:
- clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
- clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
- op->error = afs_abort_to_error(op->ac.abort_code);
+ afs_op_accumulate_error(op, error, abort_code);
+ failed_but_online:
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
goto failed;
}
case -ETIMEDOUT:
case -ETIME:
- if (op->error != -EDESTADDRREQ)
+ if (afs_op_error(op) != -EDESTADDRREQ)
goto iterate_address;
fallthrough;
case -ERFKILL:
@@ -289,7 +464,7 @@ bool afs_select_fileserver(struct afs_operation *op)
case -EHOSTDOWN:
case -ECONNREFUSED:
_debug("no conn");
- op->error = error;
+ afs_op_accumulate_error(op, error, 0);
goto iterate_address;
case -ENETRESET:
@@ -298,24 +473,31 @@ bool afs_select_fileserver(struct afs_operation *op)
fallthrough;
case -ECONNRESET:
_debug("call reset");
- op->error = error;
+ afs_op_set_error(op, error);
goto failed;
}
restart_from_beginning:
+ trace_afs_rotate(op, afs_rotate_trace_restart, 0);
_debug("restart");
- afs_end_cursor(&op->ac);
+ op->estate = NULL;
op->server = NULL;
+ afs_clear_server_states(op);
+ op->server_states = NULL;
afs_put_serverlist(op->net, op->server_list);
op->server_list = NULL;
start:
_debug("start");
+ ASSERTCMP(op->estate, ==, NULL);
/* See if we need to do an update of the volume record. Note that the
* volume may have moved or even have been deleted.
*/
error = afs_check_volume_status(op->volume, op);
- if (error < 0)
- goto failed_set_error;
+ trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
+ if (error < 0) {
+ afs_op_set_error(op, error);
+ goto failed;
+ }
if (!afs_start_fs_iteration(op, vnode))
goto failed;
@@ -323,52 +505,85 @@ start:
_debug("__ VOL %llx __", op->volume->vid);
pick_server:
- _debug("pick [%lx]", op->untried);
+ _debug("pick [%lx]", op->untried_servers);
+ ASSERTCMP(op->estate, ==, NULL);
- error = afs_wait_for_fs_probes(op->server_list, op->untried);
- if (error < 0)
- goto failed_set_error;
+ error = afs_wait_for_fs_probes(op, op->server_states,
+ !(op->flags & AFS_OPERATION_UNINTR));
+ switch (error) {
+ case 0: /* No untried responsive servers and no outstanding probes */
+ trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
+ goto no_more_servers;
+ case 1: /* Got a response */
+ trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
+ break;
+ case 2: /* Probe data superseded */
+ trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
+ goto restart_from_beginning;
+ default:
+ trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
+ afs_op_set_error(op, error);
+ goto failed;
+ }
- /* Pick the untried server with the lowest RTT. If we have outstanding
- * callbacks, we stick with the server we're already using if we can.
+ /* Pick the untried server with the highest priority untried endpoint.
+ * If we have outstanding callbacks, we stick with the server we're
+ * already using if we can.
*/
if (op->server) {
- _debug("server %u", op->index);
- if (test_bit(op->index, &op->untried))
+ _debug("server %u", op->server_index);
+ if (test_bit(op->server_index, &op->untried_servers))
goto selected_server;
op->server = NULL;
_debug("no server");
}
- op->index = -1;
- rtt = U32_MAX;
+ rcu_read_lock();
+ op->server_index = -1;
+ best_prio = -1;
for (i = 0; i < op->server_list->nr_servers; i++) {
- struct afs_server *s = op->server_list->servers[i].server;
+ struct afs_endpoint_state *es;
+ struct afs_server_entry *se = &op->server_list->servers[i];
+ struct afs_addr_list *sal;
+ struct afs_server *s = se->server;
- if (!test_bit(i, &op->untried) ||
+ if (!test_bit(i, &op->untried_servers) ||
+ test_bit(AFS_SE_EXCLUDED, &se->flags) ||
!test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
continue;
- if (s->probe.rtt < rtt) {
- op->index = i;
- rtt = s->probe.rtt;
+ es = op->server_states[i].endpoint_state;
+ sal = es->addresses;
+
+ afs_get_address_preferences_rcu(op->net, sal);
+ for (j = 0; j < sal->nr_addrs; j++) {
+ if (es->failed_set & (1 << j))
+ continue;
+ if (!sal->addrs[j].peer)
+ continue;
+ if (sal->addrs[j].prio > best_prio) {
+ op->server_index = i;
+ best_prio = sal->addrs[j].prio;
+ }
}
}
+ rcu_read_unlock();
- if (op->index == -1)
+ if (op->server_index == -1)
goto no_more_servers;
selected_server:
- _debug("use %d", op->index);
- __clear_bit(op->index, &op->untried);
+ trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
+ _debug("use %d prio %u", op->server_index, best_prio);
+ __clear_bit(op->server_index, &op->untried_servers);
/* We're starting on a different fileserver from the list. We need to
* check it, create a callback intercept, find its address list and
* probe its capabilities before we use it.
*/
- ASSERTCMP(op->ac.alist, ==, NULL);
- server = op->server_list->servers[op->index].server;
+ ASSERTCMP(op->estate, ==, NULL);
+ server = op->server_list->servers[op->server_index].server;
- if (!afs_check_server_record(op, server))
+ if (!afs_check_server_record(op, server, op->key))
goto failed;
_debug("USING SERVER: %pU", &server->uuid);
@@ -377,58 +592,81 @@ selected_server:
op->server = server;
if (vnode->cb_server != server) {
vnode->cb_server = server;
- vnode->cb_s_break = server->cb_s_break;
- vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
- vnode->cb_v_break = vnode->volume->cb_v_break;
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change);
}
- read_lock(&server->fs_lock);
- alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->fs_lock));
- afs_get_addrlist(alist);
- read_unlock(&server->fs_lock);
-
retry_server:
- memset(&op->ac, 0, sizeof(op->ac));
-
- if (!op->ac.alist)
- op->ac.alist = alist;
- else
- afs_put_addrlist(alist);
-
- op->ac.index = -1;
+ op->addr_tried = 0;
+ op->addr_index = -1;
iterate_address:
- ASSERT(op->ac.alist);
/* Iterate over the current server's address list to try and find an
* address on which it will respond to us.
*/
- if (!afs_iterate_addresses(&op->ac))
- goto out_of_addresses;
+ op->estate = op->server_states[op->server_index].endpoint_state;
+ set = READ_ONCE(op->estate->responsive_set);
+ failed = READ_ONCE(op->estate->failed_set);
+ _debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
+ set &= ~(failed | op->addr_tried);
+ trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
+ if (!set)
+ goto wait_for_more_probe_results;
+
+ alist = op->estate->addresses;
+ best_prio = -1;
+ addr_index = 0;
+ for (i = 0; i < alist->nr_addrs; i++) {
+ if (!(set & (1 << i)))
+ continue;
+ if (alist->addrs[i].prio > best_prio) {
+ addr_index = i;
+ best_prio = alist->addrs[i].prio;
+ }
+ }
- _debug("address [%u] %u/%u %pISp",
- op->index, op->ac.index, op->ac.alist->nr_addrs,
- &op->ac.alist->addrs[op->ac.index].transport);
+ alist->preferred = addr_index;
+
+ op->addr_index = addr_index;
+ set_bit(addr_index, &op->addr_tried);
+ _debug("address [%u] %u/%u %pISp",
+ op->server_index, addr_index, alist->nr_addrs,
+ rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
+go_again:
+ op->volsync.creation = TIME64_MIN;
+ op->volsync.update = TIME64_MIN;
+ op->call_responded = false;
_leave(" = t");
return true;
-out_of_addresses:
+wait_for_more_probe_results:
+ error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
+ !(op->flags & AFS_OPERATION_UNINTR));
+ if (error == 1)
+ goto iterate_address;
+ if (!error)
+ goto restart_from_beginning;
+
/* We've now had a failure to respond on all of a server's addresses -
* immediately probe them again and consider retrying the server.
*/
+ trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
afs_probe_fileserver(op->net, op->server);
if (op->flags & AFS_OPERATION_RETRY_SERVER) {
- alist = op->ac.alist;
- error = afs_wait_for_one_fs_probe(
- op->server, !(op->flags & AFS_OPERATION_UNINTR));
+ error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
+ !(op->flags & AFS_OPERATION_UNINTR));
switch (error) {
- case 0:
+ case 1:
op->flags &= ~AFS_OPERATION_RETRY_SERVER;
+ trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
goto retry_server;
+ case 0:
+ trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
+ goto restart_from_beginning;
case -ERESTARTSYS:
- goto failed_set_error;
+ afs_op_set_error(op, error);
+ goto failed;
case -ETIME:
case -EDESTADDRREQ:
goto next_server;
@@ -436,34 +674,38 @@ out_of_addresses:
}
next_server:
+ trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
_debug("next");
- afs_end_cursor(&op->ac);
+ op->estate = NULL;
goto pick_server;
no_more_servers:
/* That's all the servers poked to no good effect. Try again if some
* of them were busy.
*/
- if (op->flags & AFS_OPERATION_VBUSY)
+ trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
+ if (op->flags & AFS_OPERATION_VBUSY) {
+ afs_sleep_and_retry(op);
+ op->flags &= ~AFS_OPERATION_VBUSY;
goto restart_from_beginning;
+ }
- e.error = -EDESTADDRREQ;
- e.responded = false;
+ rcu_read_lock();
for (i = 0; i < op->server_list->nr_servers; i++) {
- struct afs_server *s = op->server_list->servers[i].server;
+ struct afs_endpoint_state *estate;
- afs_prioritise_error(&e, READ_ONCE(s->probe.error),
- s->probe.abort_code);
+ estate = op->server_states[i].endpoint_state;
+ error = READ_ONCE(estate->error);
+ if (error < 0)
+ afs_op_accumulate_error(op, error, estate->abort_code);
}
+ rcu_read_unlock();
- error = e.error;
-
-failed_set_error:
- op->error = error;
failed:
+ trace_afs_rotate(op, afs_rotate_trace_failed, 0);
op->flags |= AFS_OPERATION_STOP;
- afs_end_cursor(&op->ac);
- _leave(" = f [failed %d]", op->error);
+ op->estate = NULL;
+ _leave(" = f [failed %d]", afs_op_error(op));
return false;
}
@@ -482,37 +724,40 @@ void afs_dump_edestaddrreq(const struct afs_operation *op)
rcu_read_lock();
pr_notice("EDESTADDR occurred\n");
- pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
+ pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
op->file[0].cb_break_before,
- op->file[1].cb_break_before, op->flags, op->error);
- pr_notice("FC: ut=%lx ix=%d ni=%u\n",
- op->untried, op->index, op->nr_iterations);
+ op->file[1].cb_break_before, op->flags, op->cumul_error.error);
+ pr_notice("OP: ut=%lx ix=%d ni=%u\n",
+ op->untried_servers, op->server_index, op->nr_iterations);
+ pr_notice("OP: call er=%d ac=%d r=%u\n",
+ op->call_error, op->call_abort_code, op->call_responded);
if (op->server_list) {
const struct afs_server_list *sl = op->server_list;
- pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
- sl->nr_servers, sl->preferred, sl->vnovol_mask);
+
+ pr_notice("FC: SL nr=%u vnov=%hx\n",
+ sl->nr_servers, sl->vnovol_mask);
for (i = 0; i < sl->nr_servers; i++) {
const struct afs_server *s = sl->servers[i].server;
+ const struct afs_endpoint_state *e =
+ rcu_dereference(s->endpoint_state);
+ const struct afs_addr_list *a = e->addresses;
+
pr_notice("FC: server fl=%lx av=%u %pU\n",
s->flags, s->addr_version, &s->uuid);
- if (s->addresses) {
- const struct afs_addr_list *a =
- rcu_dereference(s->addresses);
+ pr_notice("FC: - pq=%x R=%lx F=%lx\n",
+ e->probe_seq, e->responsive_set, e->failed_set);
+ if (a) {
pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
a->version,
a->nr_ipv4, a->nr_addrs, a->max_addrs,
a->preferred);
- pr_notice("FC: - R=%lx F=%lx\n",
- a->responded, a->failed);
- if (a == op->ac.alist)
+ if (a == e->addresses)
pr_notice("FC: - current\n");
}
}
}
- pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
- op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
- op->ac.responded, op->ac.nr_iterations);
+ pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
rcu_read_unlock();
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 7817e2b860e5..bf0e4ea0aafd 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,13 +18,23 @@
struct workqueue_struct *afs_async_calls;
+static void afs_deferred_free_worker(struct work_struct *work);
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID);
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob);
static int afs_deliver_cm_op_id(struct afs_call *);
+static const struct rxrpc_kernel_ops afs_rxrpc_callback_ops = {
+ .notify_new_call = afs_rx_new_call,
+ .discard_new_call = afs_rx_discard_new_call,
+ .user_attach_call = afs_rx_attach,
+ .notify_oob = afs_rx_notify_oob,
+};
+
/* asynchronous incoming call initial processing */
static const struct afs_call_type afs_RXCMxxxx = {
.name = "CB.xxxx",
@@ -48,6 +58,7 @@ int afs_open_socket(struct afs_net *net)
goto error_1;
socket->sk->sk_allocation = GFP_NOFS;
+ socket->sk->sk_user_data = net;
/* bind the callback manager's address to make this a server socket */
memset(&srx, 0, sizeof(srx));
@@ -63,16 +74,24 @@ int afs_open_socket(struct afs_net *net)
if (ret < 0)
goto error_2;
- ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ ret = rxrpc_sock_set_manage_response(socket->sk, true);
+ if (ret < 0)
+ goto error_2;
+
+ ret = afs_create_token_key(net, socket);
+ if (ret < 0)
+ pr_err("Couldn't create RxGK CM key: %d\n", ret);
+
+ ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx));
if (ret == -EADDRINUSE) {
srx.transport.sin6.sin6_port = 0;
- ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx));
}
if (ret < 0)
goto error_2;
srx.srx_service = YFS_CM_SERVICE;
- ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx));
if (ret < 0)
goto error_2;
@@ -83,8 +102,7 @@ int afs_open_socket(struct afs_net *net)
* it sends back to us.
*/
- rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
- afs_rx_discard_new_call);
+ rxrpc_kernel_set_notifications(socket, &afs_rxrpc_callback_ops);
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
@@ -124,7 +142,9 @@ void afs_close_socket(struct afs_net *net)
kernel_sock_shutdown(net->socket, SHUT_RDWR);
flush_workqueue(afs_async_calls);
+ net->socket->sk->sk_user_data = NULL;
sock_release(net->socket);
+ key_put(net->fs_cm_token_key);
_debug("dework");
_leave("");
@@ -148,7 +168,9 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->net = net;
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
refcount_set(&call->ref, 1);
- INIT_WORK(&call->async_work, afs_process_async_call);
+ INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call);
+ INIT_WORK(&call->work, call->type->work);
+ INIT_WORK(&call->free_work, afs_deferred_free_worker);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
call->iter = &call->def_iter;
@@ -159,6 +181,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
return call;
}
+static void afs_free_call(struct afs_call *call)
+{
+ struct afs_net *net = call->net;
+ int o;
+
+ ASSERT(!work_pending(&call->async_work));
+
+ rxrpc_kernel_put_peer(call->peer);
+
+ if (call->rxcall) {
+ rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
+ rxrpc_kernel_put_call(net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->type->destructor)
+ call->type->destructor(call);
+
+ afs_unuse_server_notime(call->net, call->server, afs_server_trace_unuse_call);
+ kfree(call->request);
+
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
+ __builtin_return_address(0));
+ kfree(call);
+
+ o = atomic_dec_return(&net->nr_outstanding_calls);
+ if (o == 0)
+ wake_up_var(&net->nr_outstanding_calls);
+}
+
/*
* Dispose of a reference on a call.
*/
@@ -173,43 +225,34 @@ void afs_put_call(struct afs_call *call)
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
+ if (zero)
+ afs_free_call(call);
+}
- if (zero) {
- ASSERT(!work_pending(&call->async_work));
- ASSERT(call->type->name != NULL);
-
- if (call->rxcall) {
- rxrpc_kernel_end_call(net->socket, call->rxcall);
- call->rxcall = NULL;
- }
- if (call->type->destructor)
- call->type->destructor(call);
-
- afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
- afs_put_addrlist(call->alist);
- kfree(call->request);
-
- trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
- __builtin_return_address(0));
- kfree(call);
+static void afs_deferred_free_worker(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, free_work);
- o = atomic_dec_return(&net->nr_outstanding_calls);
- if (o == 0)
- wake_up_var(&net->nr_outstanding_calls);
- }
+ afs_free_call(call);
}
-static struct afs_call *afs_get_call(struct afs_call *call,
- enum afs_call_trace why)
+/*
+ * Dispose of a reference on a call, deferring the cleanup to a workqueue
+ * to avoid lock recursion.
+ */
+void afs_deferred_put_call(struct afs_call *call)
{
- int r;
-
- __refcount_inc(&call->ref, &r);
+ struct afs_net *net = call->net;
+ unsigned int debug_id = call->debug_id;
+ bool zero;
+ int r, o;
- trace_afs_call(call->debug_id, why, r + 1,
- atomic_read(&call->net->nr_outstanding_calls),
+ zero = __refcount_dec_and_test(&call->ref, &r);
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
- return call;
+ if (zero)
+ schedule_work(&call->free_work);
}
/*
@@ -218,8 +261,6 @@ static struct afs_call *afs_get_call(struct afs_call *call,
static void afs_queue_call_work(struct afs_call *call)
{
if (call->type->work) {
- INIT_WORK(&call->work, call->type->work);
-
afs_get_call(call, afs_call_trace_work);
if (!queue_work(afs_wq, &call->work))
afs_put_call(call);
@@ -293,9 +334,8 @@ static void afs_notify_end_request_tx(struct sock *sock,
* Initiate a call and synchronously queue up the parameters for dispatch. Any
* error is stored into the call struct, which the caller must check for.
*/
-void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
+void afs_make_call(struct afs_call *call, gfp_t gfp)
{
- struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
@@ -303,7 +343,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
s64 tx_total_len;
int ret;
- _enter(",{%pISp},", &srx->transport);
+ _enter(",{%pISp+%u},", rxrpc_kernel_remote_addr(call->peer), call->service_id);
ASSERT(call->type != NULL);
ASSERT(call->type->name != NULL);
@@ -312,8 +352,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
call, call->type->name, key_serial(call->key),
atomic_read(&call->net->nr_outstanding_calls));
- call->addr_ix = ac->index;
- call->alist = afs_get_addrlist(ac->alist);
+ trace_afs_make_call(call);
/* Work out the length we're going to transmit. This is awkward for
* calls such as FS.StoreData where there's an extra injection of data
@@ -332,12 +371,15 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
}
/* create a call */
- rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
+ rxcall = rxrpc_kernel_begin_call(call->net->socket, call->peer, call->key,
(unsigned long)call,
- tx_total_len, gfp,
+ tx_total_len,
+ call->max_lifespan,
+ gfp,
(call->async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter),
+ call->service_id,
call->upgrade,
(call->intr ? RXRPC_PREINTERRUPTIBLE :
RXRPC_UNINTERRUPTIBLE),
@@ -349,10 +391,6 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
}
call->rxcall = rxcall;
-
- if (call->max_lifespan)
- rxrpc_kernel_set_max_life(call->net->socket, rxcall,
- call->max_lifespan);
call->issue_time = ktime_get_real();
/* send the request */
@@ -391,46 +429,50 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
/* Note that at this point, we may have received the reply or an abort
* - and an asynchronous call may already have completed.
*
- * afs_wait_for_call_to_complete(call, ac)
+ * afs_wait_for_call_to_complete(call)
* must be called to synchronously clean up.
*/
return;
error_do_abort:
- if (ret != -ECONNABORTED) {
+ if (ret != -ECONNABORTED)
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret,
afs_abort_send_data_error);
- } else {
+ if (call->async) {
+ afs_see_call(call, afs_call_trace_async_abort);
+ return;
+ }
+
+ if (ret == -ECONNABORTED) {
len = 0;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
rxrpc_kernel_recv_data(call->net->socket, rxcall,
&msg.msg_iter, &len, false,
&call->abort_code, &call->service_id);
- ac->abort_code = call->abort_code;
- ac->responded = true;
+ call->responded = true;
}
call->error = ret;
trace_afs_call_done(call);
error_kill_call:
- if (call->type->done)
- call->type->done(call);
+ if (call->async)
+ afs_see_call(call, afs_call_trace_async_kill);
+ if (call->type->immediate_cancel)
+ call->type->immediate_cancel(call);
/* We need to dispose of the extra ref we grabbed for an async call.
* The call, however, might be queued on afs_async_calls and we need to
* make sure we don't get any more notifications that might requeue it.
*/
- if (call->rxcall) {
- rxrpc_kernel_end_call(call->net->socket, call->rxcall);
- call->rxcall = NULL;
- }
+ if (call->rxcall)
+ rxrpc_kernel_shutdown_call(call->net->socket, call->rxcall);
if (call->async) {
if (cancel_work_sync(&call->async_work))
afs_put_call(call);
- afs_put_call(call);
+ afs_set_call_complete(call, ret, 0);
}
- ac->error = ret;
+ call->error = ret;
call->state = AFS_CALL_COMPLETE;
_leave(" = %d", ret);
}
@@ -464,14 +506,14 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort)
max = m + 1;
pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n",
msg, call->type->name,
- &call->alist->addrs[call->addr_ix].transport);
+ rxrpc_kernel_remote_addr(call->peer));
}
}
/*
* deliver messages to a call
*/
-static void afs_deliver_to_call(struct afs_call *call)
+void afs_deliver_to_call(struct afs_call *call)
{
enum afs_call_state state;
size_t len;
@@ -511,6 +553,7 @@ static void afs_deliver_to_call(struct afs_call *call)
ret = -EBADMSG;
switch (ret) {
case 0:
+ call->responded = true;
afs_queue_call_work(call);
if (state == AFS_CALL_CL_PROC_REPLY) {
if (call->op)
@@ -525,9 +568,11 @@ static void afs_deliver_to_call(struct afs_call *call)
goto out;
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+ call->responded = true;
afs_log_error(call, call->abort_code);
goto done;
case -ENOTSUPP:
+ call->responded = true;
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret,
@@ -569,55 +614,50 @@ local_abort:
abort_code = 0;
call_complete:
afs_set_call_complete(call, ret, remote_abort);
- state = AFS_CALL_COMPLETE;
goto done;
}
/*
- * Wait synchronously for a call to complete and clean up the call struct.
+ * Wait synchronously for a call to complete.
*/
-long afs_wait_for_call_to_complete(struct afs_call *call,
- struct afs_addr_cursor *ac)
+void afs_wait_for_call_to_complete(struct afs_call *call)
{
- long ret;
bool rxrpc_complete = false;
- DECLARE_WAITQUEUE(myself, current);
-
_enter("");
- ret = call->error;
- if (ret < 0)
- goto out;
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
+ DECLARE_WAITQUEUE(myself, current);
+
+ add_wait_queue(&call->waitq, &myself);
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /* deliver any messages that are in the queue */
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
+ call->need_attention) {
+ call->need_attention = false;
+ __set_current_state(TASK_RUNNING);
+ afs_deliver_to_call(call);
+ continue;
+ }
- add_wait_queue(&call->waitq, &myself);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- /* deliver any messages that are in the queue */
- if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
- call->need_attention) {
- call->need_attention = false;
- __set_current_state(TASK_RUNNING);
- afs_deliver_to_call(call);
- continue;
- }
+ if (afs_check_call_state(call, AFS_CALL_COMPLETE))
+ break;
- if (afs_check_call_state(call, AFS_CALL_COMPLETE))
- break;
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
+ /* rxrpc terminated the call. */
+ rxrpc_complete = true;
+ break;
+ }
- if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
- /* rxrpc terminated the call. */
- rxrpc_complete = true;
- break;
+ schedule();
}
- schedule();
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
}
- remove_wait_queue(&call->waitq, &myself);
- __set_current_state(TASK_RUNNING);
-
if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
if (rxrpc_complete) {
afs_set_call_complete(call, call->error, call->abort_code);
@@ -630,29 +670,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
afs_set_call_complete(call, -EINTR, 0);
}
}
-
- spin_lock_bh(&call->state_lock);
- ac->abort_code = call->abort_code;
- ac->error = call->error;
- spin_unlock_bh(&call->state_lock);
-
- ret = ac->error;
- switch (ret) {
- case 0:
- ret = call->ret0;
- call->ret0 = 0;
-
- fallthrough;
- case -ECONNABORTED:
- ac->responded = true;
- break;
- }
-
-out:
- _debug("call complete");
- afs_put_call(call);
- _leave(" = %p", (void *)ret);
- return ret;
}
/*
@@ -668,7 +685,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
- * wake up an asynchronous call
+ * Wake up an asynchronous call. The caller is holding the call notify
+ * spinlock around this, so we can't call afs_put_call().
*/
static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long call_user_ID)
@@ -685,7 +703,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
__builtin_return_address(0));
if (!queue_work(afs_async_calls, &call->async_work))
- afs_put_call(call);
+ afs_deferred_put_call(call);
}
}
@@ -739,7 +757,6 @@ void afs_charge_preallocation(struct work_struct *work)
if (rxrpc_kernel_charge_accept(net->socket,
afs_wake_up_async_call,
- afs_rx_attach,
(unsigned long)call,
GFP_KERNEL,
call->debug_id) < 0)
@@ -767,8 +784,14 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long user_call_ID)
{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
struct afs_net *net = afs_sock2net(sk);
+ call->peer = rxrpc_kernel_get_call_peer(sk->sk_socket, call->rxcall);
+ call->server = afs_find_server(call->peer);
+ if (!call->server)
+ trace_afs_cm_no_server(call, rxrpc_kernel_remote_srx(call->peer));
+
queue_work(afs_wq, &net->charge_preallocation_work);
}
@@ -795,9 +818,14 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
if (!afs_cm_incoming_call(call))
return -ENOTSUPP;
+ call->security_ix = rxrpc_kernel_query_call_security(call->rxcall,
+ &call->service_id,
+ &call->enctype);
+
trace_afs_cb_call(call);
+ call->work.func = call->type->work;
- /* pass responsibility for the remainer of this message off to the
+ /* pass responsibility for the remainder of this message off to the
* cache manager op */
return call->type->deliver(call);
}
@@ -946,3 +974,13 @@ noinline int afs_protocol_error(struct afs_call *call,
call->unmarshalling_error = true;
return -EBADMSG;
}
+
+/*
+ * Wake up OOB notification processing.
+ */
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob)
+{
+ struct afs_net *net = sk->sk_user_data;
+
+ schedule_work(&net->rx_oob_work);
+}
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 7c6a63a30394..55ddce94af03 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -16,6 +16,31 @@
static DEFINE_HASHTABLE(afs_permits_cache, 10);
static DEFINE_SPINLOCK(afs_permits_lock);
+static DEFINE_MUTEX(afs_key_lock);
+
+/*
+ * Allocate a key to use as a placeholder for anonymous user security.
+ */
+static int afs_alloc_anon_key(struct afs_cell *cell)
+{
+ struct key *key;
+
+ mutex_lock(&afs_key_lock);
+ key = cell->anonymous_key;
+ if (!key) {
+ key = rxrpc_get_null_key(cell->key_desc);
+ if (!IS_ERR(key))
+ cell->anonymous_key = key;
+ }
+ mutex_unlock(&afs_key_lock);
+
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ _debug("anon key %p{%x}",
+ cell->anonymous_key, key_serial(cell->anonymous_key));
+ return 0;
+}
/*
* get a key
@@ -23,11 +48,12 @@ static DEFINE_SPINLOCK(afs_permits_lock);
struct key *afs_request_key(struct afs_cell *cell)
{
struct key *key;
+ int ret;
- _enter("{%x}", key_serial(cell->anonymous_key));
+ _enter("{%s}", cell->key_desc);
- _debug("key %s", cell->anonymous_key->description);
- key = request_key_net(&key_type_rxrpc, cell->anonymous_key->description,
+ _debug("key %s", cell->key_desc);
+ key = request_key_net(&key_type_rxrpc, cell->key_desc,
cell->net->net, NULL);
if (IS_ERR(key)) {
if (PTR_ERR(key) != -ENOKEY) {
@@ -35,6 +61,12 @@ struct key *afs_request_key(struct afs_cell *cell)
return key;
}
+ if (!cell->anonymous_key) {
+ ret = afs_alloc_anon_key(cell);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ }
+
/* act as anonymous user */
_leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
return key_get(cell->anonymous_key);
@@ -52,11 +84,10 @@ struct key *afs_request_key_rcu(struct afs_cell *cell)
{
struct key *key;
- _enter("{%x}", key_serial(cell->anonymous_key));
+ _enter("{%s}", cell->key_desc);
- _debug("key %s", cell->anonymous_key->description);
- key = request_key_net_rcu(&key_type_rxrpc,
- cell->anonymous_key->description,
+ _debug("key %s", cell->key_desc);
+ key = request_key_net_rcu(&key_type_rxrpc, cell->key_desc,
cell->net->net);
if (IS_ERR(key)) {
if (PTR_ERR(key) != -ENOKEY) {
@@ -65,6 +96,8 @@ struct key *afs_request_key_rcu(struct afs_cell *cell)
}
/* act as anonymous user */
+ if (!cell->anonymous_key)
+ return NULL; /* Need to allocate */
_leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
return key_get(cell->anonymous_key);
} else {
@@ -395,7 +428,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int afs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -408,7 +441,7 @@ int afs_permission(struct user_namespace *mnt_userns, struct inode *inode,
if (mask & MAY_NOT_BLOCK) {
key = afs_request_key_rcu(vnode->volume->cell);
- if (IS_ERR(key))
+ if (IS_ERR_OR_NULL(key))
return -ECHILD;
ret = -ECHILD;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index b5237206eac3..c4428ebddb1d 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -14,210 +14,103 @@
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static atomic_t afs_server_debug_id;
-static struct afs_server *afs_maybe_use_server(struct afs_server *,
- enum afs_server_trace);
static void __afs_put_server(struct afs_net *, struct afs_server *);
+static void afs_server_timer(struct timer_list *timer);
+static void afs_server_destroyer(struct work_struct *work);
/*
* Find a server by one of its addresses.
*/
-struct afs_server *afs_find_server(struct afs_net *net,
- const struct sockaddr_rxrpc *srx)
+struct afs_server *afs_find_server(const struct rxrpc_peer *peer)
{
- const struct afs_addr_list *alist;
- struct afs_server *server = NULL;
- unsigned int i;
- int seq = 0, diff;
+ struct afs_server *server = (struct afs_server *)rxrpc_kernel_get_peer_data(peer);
- rcu_read_lock();
-
- do {
- if (server)
- afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
- server = NULL;
- read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
-
- if (srx->transport.family == AF_INET6) {
- const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
- hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
- alist = rcu_dereference(server->addresses);
- for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
- b = &alist->addrs[i].transport.sin6;
- diff = ((u16 __force)a->sin6_port -
- (u16 __force)b->sin6_port);
- if (diff == 0)
- diff = memcmp(&a->sin6_addr,
- &b->sin6_addr,
- sizeof(struct in6_addr));
- if (diff == 0)
- goto found;
- }
- }
- } else {
- const struct sockaddr_in *a = &srx->transport.sin, *b;
- hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
- alist = rcu_dereference(server->addresses);
- for (i = 0; i < alist->nr_ipv4; i++) {
- b = &alist->addrs[i].transport.sin;
- diff = ((u16 __force)a->sin_port -
- (u16 __force)b->sin_port);
- if (diff == 0)
- diff = ((u32 __force)a->sin_addr.s_addr -
- (u32 __force)b->sin_addr.s_addr);
- if (diff == 0)
- goto found;
- }
- }
- }
-
- server = NULL;
- continue;
- found:
- server = afs_maybe_use_server(server, afs_server_trace_get_by_addr);
-
- } while (need_seqretry(&net->fs_addr_lock, seq));
-
- done_seqretry(&net->fs_addr_lock, seq);
-
- rcu_read_unlock();
- return server;
+ if (!server)
+ return NULL;
+ return afs_use_server(server, false, afs_server_trace_use_cm_call);
}
/*
- * Look up a server by its UUID and mark it active.
+ * Look up a server by its UUID and mark it active. The caller must hold
+ * cell->fs_lock.
*/
-struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
+static struct afs_server *afs_find_server_by_uuid(struct afs_cell *cell, const uuid_t *uuid)
{
- struct afs_server *server = NULL;
+ struct afs_server *server;
struct rb_node *p;
- int diff, seq = 0;
+ int diff;
_enter("%pU", uuid);
- do {
- /* Unfortunately, rbtree walking doesn't give reliable results
- * under just the RCU read lock, so we have to check for
- * changes.
- */
- if (server)
- afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
- server = NULL;
-
- read_seqbegin_or_lock(&net->fs_lock, &seq);
-
- p = net->fs_servers.rb_node;
- while (p) {
- server = rb_entry(p, struct afs_server, uuid_rb);
-
- diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
- if (diff < 0) {
- p = p->rb_left;
- } else if (diff > 0) {
- p = p->rb_right;
- } else {
- afs_use_server(server, afs_server_trace_get_by_uuid);
- break;
- }
+ p = cell->fs_servers.rb_node;
+ while (p) {
+ server = rb_entry(p, struct afs_server, uuid_rb);
- server = NULL;
+ diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
+ if (diff < 0) {
+ p = p->rb_left;
+ } else if (diff > 0) {
+ p = p->rb_right;
+ } else {
+ if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags))
+ return NULL; /* Need a write lock */
+ afs_use_server(server, true, afs_server_trace_use_by_uuid);
+ return server;
}
- } while (need_seqretry(&net->fs_lock, seq));
-
- done_seqretry(&net->fs_lock, seq);
+ }
- _leave(" = %p", server);
- return server;
+ return NULL;
}
/*
- * Install a server record in the namespace tree. If there's a clash, we stick
- * it into a list anchored on whichever afs_server struct is actually in the
- * tree.
+ * Install a server record in the cell tree. The caller must hold an exclusive
+ * lock on cell->fs_lock.
*/
static struct afs_server *afs_install_server(struct afs_cell *cell,
- struct afs_server *candidate)
+ struct afs_server **candidate)
{
- const struct afs_addr_list *alist;
- struct afs_server *server, *next;
+ struct afs_server *server;
struct afs_net *net = cell->net;
struct rb_node **pp, *p;
int diff;
_enter("%p", candidate);
- write_seqlock(&net->fs_lock);
-
/* Firstly install the server in the UUID lookup tree */
- pp = &net->fs_servers.rb_node;
+ pp = &cell->fs_servers.rb_node;
p = NULL;
while (*pp) {
p = *pp;
_debug("- consider %p", p);
server = rb_entry(p, struct afs_server, uuid_rb);
- diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
- if (diff < 0) {
+ diff = memcmp(&(*candidate)->uuid, &server->uuid, sizeof(uuid_t));
+ if (diff < 0)
pp = &(*pp)->rb_left;
- } else if (diff > 0) {
+ else if (diff > 0)
pp = &(*pp)->rb_right;
- } else {
- if (server->cell == cell)
- goto exists;
-
- /* We have the same UUID representing servers in
- * different cells. Append the new server to the list.
- */
- for (;;) {
- next = rcu_dereference_protected(
- server->uuid_next,
- lockdep_is_held(&net->fs_lock.lock));
- if (!next)
- break;
- server = next;
- }
- rcu_assign_pointer(server->uuid_next, candidate);
- candidate->uuid_prev = server;
- server = candidate;
- goto added_dup;
- }
+ else
+ goto exists;
}
- server = candidate;
+ server = *candidate;
+ *candidate = NULL;
rb_link_node(&server->uuid_rb, p, pp);
- rb_insert_color(&server->uuid_rb, &net->fs_servers);
+ rb_insert_color(&server->uuid_rb, &cell->fs_servers);
+ write_seqlock(&net->fs_lock);
hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
+ write_sequnlock(&net->fs_lock);
-added_dup:
- write_seqlock(&net->fs_addr_lock);
- alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&net->fs_addr_lock.lock));
-
- /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
- * it in the IPv4 and/or IPv6 reverse-map lists.
- *
- * TODO: For speed we want to use something other than a flat list
- * here; even sorting the list in terms of lowest address would help a
- * bit, but anything we might want to do gets messy and memory
- * intensive.
- */
- if (alist->nr_ipv4 > 0)
- hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
- if (alist->nr_addrs > alist->nr_ipv4)
- hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
-
- write_sequnlock(&net->fs_addr_lock);
+ afs_get_cell(cell, afs_cell_trace_get_server);
exists:
- afs_get_server(server, afs_server_trace_get_install);
- write_sequnlock(&net->fs_lock);
+ afs_use_server(server, true, afs_server_trace_use_install);
return server;
}
/*
- * Allocate a new server record and mark it active.
+ * Allocate a new server record and mark it as active but uncreated.
*/
-static struct afs_server *afs_alloc_server(struct afs_cell *cell,
- const uuid_t *uuid,
- struct afs_addr_list *alist)
+static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *uuid)
{
struct afs_server *server;
struct afs_net *net = cell->net;
@@ -226,49 +119,50 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
if (!server)
- goto enomem;
+ return NULL;
refcount_set(&server->ref, 1);
- atomic_set(&server->active, 1);
+ atomic_set(&server->active, 0);
+ __set_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
server->debug_id = atomic_inc_return(&afs_server_debug_id);
- RCU_INIT_POINTER(server->addresses, alist);
- server->addr_version = alist->version;
server->uuid = *uuid;
rwlock_init(&server->fs_lock);
- INIT_WORK(&server->initcb_work, afs_server_init_callback_work);
+ INIT_WORK(&server->destroyer, &afs_server_destroyer);
+ timer_setup(&server->timer, afs_server_timer, 0);
+ INIT_LIST_HEAD(&server->volumes);
init_waitqueue_head(&server->probe_wq);
+ mutex_init(&server->cm_token_lock);
INIT_LIST_HEAD(&server->probe_link);
+ INIT_HLIST_NODE(&server->proc_link);
spin_lock_init(&server->probe_lock);
server->cell = cell;
server->rtt = UINT_MAX;
+ server->service_id = FS_SERVICE;
+ server->probe_counter = 1;
+ server->probed_at = jiffies - LONG_MAX / 2;
afs_inc_servers_outstanding(net);
- trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc);
_leave(" = %p", server);
return server;
-
-enomem:
- _leave(" = NULL [nomem]");
- return NULL;
}
/*
* Look up an address record for a server
*/
-static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
- struct key *key, const uuid_t *uuid)
+static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_server *server,
+ struct key *key)
{
struct afs_vl_cursor vc;
struct afs_addr_list *alist = NULL;
int ret;
ret = -ERESTARTSYS;
- if (afs_begin_vlserver_operation(&vc, cell, key)) {
+ if (afs_begin_vlserver_operation(&vc, server->cell, key)) {
while (afs_select_vlserver(&vc)) {
if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
- alist = afs_yfsvl_get_endpoints(&vc, uuid);
+ alist = afs_yfsvl_get_endpoints(&vc, &server->uuid);
else
- alist = afs_vl_get_addrs_u(&vc, uuid);
+ alist = afs_vl_get_addrs_u(&vc, &server->uuid);
}
ret = afs_end_vlserver_operation(&vc);
@@ -278,72 +172,122 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
}
/*
- * Get or create a fileserver record.
+ * Get or create a fileserver record and return it with an active-use count on
+ * it.
*/
struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
const uuid_t *uuid, u32 addr_version)
{
- struct afs_addr_list *alist;
- struct afs_server *server, *candidate;
+ struct afs_addr_list *alist = NULL;
+ struct afs_server *server, *candidate = NULL;
+ bool creating = false;
+ int ret;
_enter("%p,%pU", cell->net, uuid);
- server = afs_find_server_by_uuid(cell->net, uuid);
+ down_read(&cell->fs_lock);
+ server = afs_find_server_by_uuid(cell, uuid);
+ /* Won't see servers marked uncreated. */
+ up_read(&cell->fs_lock);
+
if (server) {
+ timer_delete_sync(&server->timer);
+ if (test_bit(AFS_SERVER_FL_CREATING, &server->flags))
+ goto wait_for_creation;
if (server->addr_version != addr_version)
set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
return server;
}
- alist = afs_vl_lookup_addrs(cell, key, uuid);
- if (IS_ERR(alist))
- return ERR_CAST(alist);
-
- candidate = afs_alloc_server(cell, uuid, alist);
+ candidate = afs_alloc_server(cell, uuid);
if (!candidate) {
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_server_oom);
return ERR_PTR(-ENOMEM);
}
- server = afs_install_server(cell, candidate);
- if (server != candidate) {
- afs_put_addrlist(alist);
+ down_write(&cell->fs_lock);
+ server = afs_install_server(cell, &candidate);
+ if (test_bit(AFS_SERVER_FL_CREATING, &server->flags)) {
+ /* We need to wait for creation to complete. */
+ up_write(&cell->fs_lock);
+ goto wait_for_creation;
+ }
+ if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+ set_bit(AFS_SERVER_FL_CREATING, &server->flags);
+ clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+ creating = true;
+ }
+ up_write(&cell->fs_lock);
+ timer_delete_sync(&server->timer);
+
+ /* If we get to create the server, we look up the addresses and then
+ * immediately dispatch an asynchronous probe to each interface on the
+ * fileserver. This will make sure the repeat-probing service is
+ * started.
+ */
+ if (creating) {
+ alist = afs_vl_lookup_addrs(server, key);
+ if (IS_ERR(alist)) {
+ ret = PTR_ERR(alist);
+ goto create_failed;
+ }
+
+ ret = afs_fs_probe_fileserver(cell->net, server, alist, key);
+ if (ret)
+ goto create_failed;
+
+ clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags);
+ }
+
+out:
+ afs_put_addrlist(alist, afs_alist_trace_put_server_create);
+ if (candidate) {
+ kfree(rcu_access_pointer(server->endpoint_state));
kfree(candidate);
- } else {
- /* Immediately dispatch an asynchronous probe to each interface
- * on the fileserver. This will make sure the repeat-probing
- * service is started.
- */
- afs_fs_probe_fileserver(cell->net, server, key, true);
+ afs_dec_servers_outstanding(cell->net);
+ }
+ return server ?: ERR_PTR(ret);
+
+wait_for_creation:
+ afs_see_server(server, afs_server_trace_wait_create);
+ wait_on_bit(&server->flags, AFS_SERVER_FL_CREATING, TASK_UNINTERRUPTIBLE);
+ if (test_bit_acquire(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+ /* Barrier: read flag before error */
+ ret = READ_ONCE(server->create_error);
+ afs_put_server(cell->net, server, afs_server_trace_unuse_create_fail);
+ server = NULL;
+ goto out;
}
- return server;
-}
+ ret = 0;
+ goto out;
-/*
- * Set the server timer to fire after a given delay, assuming it's not already
- * set for an earlier time.
- */
-static void afs_set_server_timer(struct afs_net *net, time64_t delay)
-{
- if (net->live) {
- afs_inc_servers_outstanding(net);
- if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
- afs_dec_servers_outstanding(net);
+create_failed:
+ down_write(&cell->fs_lock);
+
+ WRITE_ONCE(server->create_error, ret);
+ smp_wmb(); /* Barrier: set error before flag. */
+ set_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+
+ clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags);
+
+ if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+ clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+ creating = true;
}
+ afs_unuse_server(cell->net, server, afs_server_trace_unuse_create_fail);
+ server = NULL;
+
+ up_write(&cell->fs_lock);
+ goto out;
}
/*
- * Server management timer. We have an increment on fs_outstanding that we
- * need to pass along to the work item.
+ * Set/reduce a server's timer.
*/
-void afs_servers_timer(struct timer_list *timer)
+static void afs_set_server_timer(struct afs_server *server, unsigned int delay_secs)
{
- struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
-
- _enter("");
- if (!queue_work(afs_wq, &net->fs_manager))
- afs_dec_servers_outstanding(net);
+ mod_timer(&server->timer, jiffies + delay_secs * HZ);
}
/*
@@ -362,32 +306,20 @@ struct afs_server *afs_get_server(struct afs_server *server,
}
/*
- * Try to get a reference on a server object.
+ * Get an active count on a server object and maybe remove from the inactive
+ * list.
*/
-static struct afs_server *afs_maybe_use_server(struct afs_server *server,
- enum afs_server_trace reason)
-{
- unsigned int a;
- int r;
-
- if (!__refcount_inc_not_zero(&server->ref, &r))
- return NULL;
-
- a = atomic_inc_return(&server->active);
- trace_afs_server(server->debug_id, r + 1, a, reason);
- return server;
-}
-
-/*
- * Get an active count on a server object.
- */
-struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason)
+struct afs_server *afs_use_server(struct afs_server *server, bool activate,
+ enum afs_server_trace reason)
{
unsigned int a;
int r;
__refcount_inc(&server->ref, &r);
a = atomic_inc_return(&server->active);
+ if (a == 1 && activate &&
+ !test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+ timer_delete(&server->timer);
trace_afs_server(server->debug_id, r + 1, a, reason);
return server;
@@ -399,13 +331,14 @@ struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_tra
void afs_put_server(struct afs_net *net, struct afs_server *server,
enum afs_server_trace reason)
{
- unsigned int a, debug_id = server->debug_id;
+ unsigned int a, debug_id;
bool zero;
int r;
if (!server)
return;
+ debug_id = server->debug_id;
a = atomic_read(&server->active);
zero = __refcount_dec_and_test(&server->ref, &r);
trace_afs_server(debug_id, r - 1, a, reason);
@@ -420,13 +353,16 @@ void afs_put_server(struct afs_net *net, struct afs_server *server,
void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
enum afs_server_trace reason)
{
- if (server) {
- unsigned int active = atomic_dec_return(&server->active);
+ if (!server)
+ return;
- if (active == 0)
- afs_set_server_timer(net, afs_server_gc_delay);
- afs_put_server(net, server, reason);
+ if (atomic_dec_and_test(&server->active)) {
+ if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) ||
+ READ_ONCE(server->cell->state) >= AFS_CELL_REMOVING)
+ schedule_work(&server->destroyer);
}
+
+ afs_put_server(net, server, reason);
}
/*
@@ -435,10 +371,22 @@ void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
void afs_unuse_server(struct afs_net *net, struct afs_server *server,
enum afs_server_trace reason)
{
- if (server) {
- server->unuse_time = ktime_get_real_seconds();
- afs_unuse_server_notime(net, server, reason);
+ if (!server)
+ return;
+
+ if (atomic_dec_and_test(&server->active)) {
+ if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) &&
+ READ_ONCE(server->cell->state) < AFS_CELL_REMOVING) {
+ time64_t unuse_time = ktime_get_real_seconds();
+
+ server->unuse_time = unuse_time;
+ afs_set_server_timer(server, afs_server_gc_delay);
+ } else {
+ schedule_work(&server->destroyer);
+ }
}
+
+ afs_put_server(net, server, reason);
}
static void afs_server_rcu(struct rcu_head *rcu)
@@ -447,7 +395,10 @@ static void afs_server_rcu(struct rcu_head *rcu)
trace_afs_server(server->debug_id, refcount_read(&server->ref),
atomic_read(&server->active), afs_server_trace_free);
- afs_put_addrlist(rcu_access_pointer(server->addresses));
+ afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state),
+ afs_estate_trace_put_server);
+ afs_put_cell(server->cell, afs_cell_trace_put_server);
+ kfree(server->cm_rxgk_appdata.data);
kfree(server);
}
@@ -459,173 +410,126 @@ static void __afs_put_server(struct afs_net *net, struct afs_server *server)
static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server)
{
- struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
- struct afs_addr_cursor ac = {
- .alist = alist,
- .index = alist->preferred,
- .error = 0,
- };
-
- afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ struct afs_endpoint_state *estate = rcu_access_pointer(server->endpoint_state);
+ struct afs_addr_list *alist = estate->addresses;
+
+ afs_fs_give_up_all_callbacks(net, server, &alist->addrs[alist->preferred], NULL);
}
/*
- * destroy a dead server
+ * Check to see if the server record has expired.
*/
-static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
+static bool afs_has_server_expired(const struct afs_server *server)
{
- if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
- afs_give_up_callbacks(net, server);
+ time64_t expires_at;
- flush_work(&server->initcb_work);
- afs_put_server(net, server, afs_server_trace_destroy);
+ if (atomic_read(&server->active))
+ return false;
+
+ if (server->cell->net->live ||
+ server->cell->state >= AFS_CELL_REMOVING) {
+ trace_afs_server(server->debug_id, refcount_read(&server->ref),
+ 0, afs_server_trace_purging);
+ return true;
+ }
+
+ expires_at = server->unuse_time;
+ if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
+ !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
+ expires_at += afs_server_gc_delay;
+
+ return ktime_get_real_seconds() > expires_at;
}
/*
- * Garbage collect any expired servers.
+ * Remove a server record from it's parent cell's database.
*/
-static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
+static bool afs_remove_server_from_cell(struct afs_server *server)
{
- struct afs_server *server, *next, *prev;
- int active;
-
- while ((server = gc_list)) {
- gc_list = server->gc_next;
-
- write_seqlock(&net->fs_lock);
-
- active = atomic_read(&server->active);
- if (active == 0) {
- trace_afs_server(server->debug_id, refcount_read(&server->ref),
- active, afs_server_trace_gc);
- next = rcu_dereference_protected(
- server->uuid_next, lockdep_is_held(&net->fs_lock.lock));
- prev = server->uuid_prev;
- if (!prev) {
- /* The one at the front is in the tree */
- if (!next) {
- rb_erase(&server->uuid_rb, &net->fs_servers);
- } else {
- rb_replace_node_rcu(&server->uuid_rb,
- &next->uuid_rb,
- &net->fs_servers);
- next->uuid_prev = NULL;
- }
- } else {
- /* This server is not at the front */
- rcu_assign_pointer(prev->uuid_next, next);
- if (next)
- next->uuid_prev = prev;
- }
-
- list_del(&server->probe_link);
- hlist_del_rcu(&server->proc_link);
- if (!hlist_unhashed(&server->addr4_link))
- hlist_del_rcu(&server->addr4_link);
- if (!hlist_unhashed(&server->addr6_link))
- hlist_del_rcu(&server->addr6_link);
- }
- write_sequnlock(&net->fs_lock);
+ struct afs_cell *cell = server->cell;
+
+ down_write(&cell->fs_lock);
- if (active == 0)
- afs_destroy_server(net, server);
+ if (!afs_has_server_expired(server)) {
+ up_write(&cell->fs_lock);
+ return false;
}
+
+ set_bit(AFS_SERVER_FL_EXPIRED, &server->flags);
+ _debug("expire %pU %u", &server->uuid, atomic_read(&server->active));
+ afs_see_server(server, afs_server_trace_see_expired);
+ rb_erase(&server->uuid_rb, &cell->fs_servers);
+ up_write(&cell->fs_lock);
+ return true;
}
-/*
- * Manage the records of servers known to be within a network namespace. This
- * includes garbage collecting unused servers.
- *
- * Note also that we were given an increment on net->servers_outstanding by
- * whoever queued us that we need to deal with before returning.
- */
-void afs_manage_servers(struct work_struct *work)
+static void afs_server_destroyer(struct work_struct *work)
{
- struct afs_net *net = container_of(work, struct afs_net, fs_manager);
- struct afs_server *gc_list = NULL;
- struct rb_node *cursor;
- time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
- bool purging = !net->live;
+ struct afs_endpoint_state *estate;
+ struct afs_server *server = container_of(work, struct afs_server, destroyer);
+ struct afs_net *net = server->cell->net;
- _enter("");
+ afs_see_server(server, afs_server_trace_see_destroyer);
- /* Trawl the server list looking for servers that have expired from
- * lack of use.
- */
- read_seqlock_excl(&net->fs_lock);
-
- for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
- struct afs_server *server =
- rb_entry(cursor, struct afs_server, uuid_rb);
- int active = atomic_read(&server->active);
+ if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+ return;
- _debug("manage %pU %u", &server->uuid, active);
+ if (!afs_remove_server_from_cell(server))
+ return;
- if (purging) {
- trace_afs_server(server->debug_id, refcount_read(&server->ref),
- active, afs_server_trace_purging);
- if (active != 0)
- pr_notice("Can't purge s=%08x\n", server->debug_id);
- }
+ timer_shutdown_sync(&server->timer);
+ cancel_work(&server->destroyer);
- if (active == 0) {
- time64_t expire_at = server->unuse_time;
-
- if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
- !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
- expire_at += afs_server_gc_delay;
- if (purging || expire_at <= now) {
- server->gc_next = gc_list;
- gc_list = server;
- } else if (expire_at < next_manage) {
- next_manage = expire_at;
- }
- }
- }
+ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+ afs_give_up_callbacks(net, server);
- read_sequnlock_excl(&net->fs_lock);
+ /* Unbind the rxrpc_peer records from the server. */
+ estate = rcu_access_pointer(server->endpoint_state);
+ if (estate)
+ afs_set_peer_appdata(server, estate->addresses, NULL);
- /* Update the timer on the way out. We have to pass an increment on
- * servers_outstanding in the namespace that we are in to the timer or
- * the work scheduler.
- */
- if (!purging && next_manage < TIME64_MAX) {
- now = ktime_get_real_seconds();
+ write_seqlock(&net->fs_lock);
+ list_del_init(&server->probe_link);
+ if (!hlist_unhashed(&server->proc_link))
+ hlist_del_rcu(&server->proc_link);
+ write_sequnlock(&net->fs_lock);
- if (next_manage - now <= 0) {
- if (queue_work(afs_wq, &net->fs_manager))
- afs_inc_servers_outstanding(net);
- } else {
- afs_set_server_timer(net, next_manage - now);
- }
- }
+ afs_put_server(net, server, afs_server_trace_destroy);
+}
- afs_gc_servers(net, gc_list);
+static void afs_server_timer(struct timer_list *timer)
+{
+ struct afs_server *server = container_of(timer, struct afs_server, timer);
- afs_dec_servers_outstanding(net);
- _leave(" [%d]", atomic_read(&net->servers_outstanding));
+ afs_see_server(server, afs_server_trace_see_timer);
+ if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+ schedule_work(&server->destroyer);
}
-static void afs_queue_server_manager(struct afs_net *net)
+/*
+ * Wake up all the servers in a cell so that they can purge themselves.
+ */
+void afs_purge_servers(struct afs_cell *cell)
{
- afs_inc_servers_outstanding(net);
- if (!queue_work(afs_wq, &net->fs_manager))
- afs_dec_servers_outstanding(net);
+ struct afs_server *server;
+ struct rb_node *rb;
+
+ down_read(&cell->fs_lock);
+ for (rb = rb_first(&cell->fs_servers); rb; rb = rb_next(rb)) {
+ server = rb_entry(rb, struct afs_server, uuid_rb);
+ afs_see_server(server, afs_server_trace_see_purge);
+ schedule_work(&server->destroyer);
+ }
+ up_read(&cell->fs_lock);
}
/*
- * Purge list of servers.
+ * Wait for outstanding servers.
*/
-void afs_purge_servers(struct afs_net *net)
+void afs_wait_for_servers(struct afs_net *net)
{
_enter("");
- if (del_timer_sync(&net->fs_timer))
- afs_dec_servers_outstanding(net);
-
- afs_queue_server_manager(net);
-
- _debug("wait");
atomic_dec(&net->servers_outstanding);
wait_var_event(&net->servers_outstanding,
!atomic_read(&net->servers_outstanding));
@@ -636,9 +540,12 @@ void afs_purge_servers(struct afs_net *net)
* Get an update for a server's address list.
*/
static noinline bool afs_update_server_record(struct afs_operation *op,
- struct afs_server *server)
+ struct afs_server *server,
+ struct key *key)
{
- struct afs_addr_list *alist, *discard;
+ struct afs_endpoint_state *estate;
+ struct afs_addr_list *alist;
+ bool has_addrs;
_enter("");
@@ -646,31 +553,29 @@ static noinline bool afs_update_server_record(struct afs_operation *op,
atomic_read(&server->active),
afs_server_trace_update);
- alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid);
+ alist = afs_vl_lookup_addrs(server, op->key);
if (IS_ERR(alist)) {
+ rcu_read_lock();
+ estate = rcu_dereference(server->endpoint_state);
+ has_addrs = estate->addresses;
+ rcu_read_unlock();
+
if ((PTR_ERR(alist) == -ERESTARTSYS ||
PTR_ERR(alist) == -EINTR) &&
(op->flags & AFS_OPERATION_UNINTR) &&
- server->addresses) {
+ has_addrs) {
_leave(" = t [intr]");
return true;
}
- op->error = PTR_ERR(alist);
- _leave(" = f [%d]", op->error);
+ afs_op_set_error(op, PTR_ERR(alist));
+ _leave(" = f [%d]", afs_op_error(op));
return false;
}
- discard = alist;
- if (server->addr_version != alist->version) {
- write_lock(&server->fs_lock);
- discard = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->fs_lock));
- rcu_assign_pointer(server->addresses, alist);
- server->addr_version = alist->version;
- write_unlock(&server->fs_lock);
- }
+ if (server->addr_version != alist->version)
+ afs_fs_probe_fileserver(op->net, server, alist, key);
- afs_put_addrlist(discard);
+ afs_put_addrlist(alist, afs_alist_trace_put_server_update);
_leave(" = t");
return true;
}
@@ -678,7 +583,8 @@ static noinline bool afs_update_server_record(struct afs_operation *op,
/*
* See if a server's address list needs updating.
*/
-bool afs_check_server_record(struct afs_operation *op, struct afs_server *server)
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server,
+ struct key *key)
{
bool success;
int ret, retries = 0;
@@ -698,7 +604,7 @@ retry:
update:
if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
- success = afs_update_server_record(op, server);
+ success = afs_update_server_record(op, server, key);
clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
_leave(" = %d", success);
@@ -710,7 +616,7 @@ wait:
(op->flags & AFS_OPERATION_UNINTR) ?
TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
if (ret == -ERESTARTSYS) {
- op->error = ret;
+ afs_op_set_error(op, ret);
_leave(" = f [intr]");
return false;
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index ed9056703505..20d5474837df 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -16,43 +16,70 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
if (slist && refcount_dec_and_test(&slist->usage)) {
for (i = 0; i < slist->nr_servers; i++)
afs_unuse_server(net, slist->servers[i].server,
- afs_server_trace_put_slist);
- kfree(slist);
+ afs_server_trace_unuse_slist);
+ kfree_rcu(slist, rcu);
}
}
/*
* Build a server list from a VLDB record.
*/
-struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
struct key *key,
- struct afs_vldb_entry *vldb,
- u8 type_mask)
+ struct afs_vldb_entry *vldb)
{
struct afs_server_list *slist;
struct afs_server *server;
- int ret = -ENOMEM, nr_servers = 0, i, j;
-
- for (i = 0; i < vldb->nr_servers; i++)
- if (vldb->fs_mask[i] & type_mask)
- nr_servers++;
+ unsigned int type_mask = 1 << volume->type;
+ bool use_newrepsites = false;
+ int ret = -ENOMEM, nr_servers = 0, newrep = 0, i, j, usable = 0;
+
+ /* Work out if we're going to restrict to NEWREPSITE-marked servers or
+ * not. If at least one site is marked as NEWREPSITE, then it's likely
+ * that "vos release" is busy updating RO sites. We cut over from one
+ * to the other when >=50% of the sites have been updated. Sites that
+ * are in the process of being updated are marked DONTUSE.
+ */
+ for (i = 0; i < vldb->nr_servers; i++) {
+ if (!(vldb->fs_mask[i] & type_mask))
+ continue;
+ nr_servers++;
+ if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
+ continue;
+ usable++;
+ if (vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE)
+ newrep++;
+ }
slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
if (!slist)
goto error;
+ if (newrep) {
+ if (newrep < usable / 2) {
+ slist->ro_replicating = AFS_RO_REPLICATING_USE_OLD;
+ } else {
+ slist->ro_replicating = AFS_RO_REPLICATING_USE_NEW;
+ use_newrepsites = true;
+ }
+ }
+
refcount_set(&slist->usage, 1);
rwlock_init(&slist->lock);
- for (i = 0; i < AFS_MAXTYPES; i++)
- slist->vids[i] = vldb->vid[i];
-
/* Make sure a records exists for each server in the list. */
for (i = 0; i < vldb->nr_servers; i++) {
+ unsigned long se_flags = 0;
+ bool newrepsite = vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE;
+
if (!(vldb->fs_mask[i] & type_mask))
continue;
+ if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
+ __set_bit(AFS_SE_EXCLUDED, &se_flags);
+ if (newrep && (newrepsite ^ use_newrepsites))
+ __set_bit(AFS_SE_EXCLUDED, &se_flags);
- server = afs_lookup_server(cell, key, &vldb->fs_server[i],
+ server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i],
vldb->addr_version[i]);
if (IS_ERR(server)) {
ret = PTR_ERR(server);
@@ -70,8 +97,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
- afs_put_server(cell->net, server,
- afs_server_trace_put_slist_isort);
+ afs_unuse_server_notime(volume->cell->net, server,
+ afs_server_trace_unuse_slist_isort);
continue;
}
@@ -81,6 +108,9 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
}
slist->servers[j].server = server;
+ slist->servers[j].volume = volume;
+ slist->servers[j].flags = se_flags;
+ slist->servers[j].cb_expires_at = AFS_NO_CB_PROMISE;
slist->nr_servers++;
}
@@ -92,7 +122,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
return slist;
error_2:
- afs_put_serverlist(cell->net, slist);
+ afs_put_serverlist(volume->cell->net, slist);
error:
return ERR_PTR(ret);
}
@@ -103,27 +133,117 @@ error:
bool afs_annotate_server_list(struct afs_server_list *new,
struct afs_server_list *old)
{
- struct afs_server *cur;
- int i, j;
+ unsigned long mask = 1UL << AFS_SE_EXCLUDED;
+ int i;
- if (old->nr_servers != new->nr_servers)
+ if (old->nr_servers != new->nr_servers ||
+ old->ro_replicating != new->ro_replicating)
goto changed;
- for (i = 0; i < old->nr_servers; i++)
+ for (i = 0; i < old->nr_servers; i++) {
if (old->servers[i].server != new->servers[i].server)
goto changed;
-
+ if ((old->servers[i].flags & mask) != (new->servers[i].flags & mask))
+ goto changed;
+ }
return false;
-
changed:
- /* Maintain the same preferred server as before if possible. */
- cur = old->servers[old->preferred].server;
- for (j = 0; j < new->nr_servers; j++) {
- if (new->servers[j].server == cur) {
- new->preferred = j;
- break;
+ return true;
+}
+
+/*
+ * Attach a volume to the servers it is going to use.
+ */
+void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist)
+{
+ struct afs_server_entry *se, *pe;
+ struct afs_server *server;
+ struct list_head *p;
+ unsigned int i;
+
+ down_write(&volume->cell->vs_lock);
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ se = &slist->servers[i];
+ server = se->server;
+
+ list_for_each(p, &server->volumes) {
+ pe = list_entry(p, struct afs_server_entry, slink);
+ if (volume->vid <= pe->volume->vid)
+ break;
}
+ list_add_tail(&se->slink, p);
}
- return true;
+ slist->attached = true;
+ up_write(&volume->cell->vs_lock);
+}
+
+/*
+ * Reattach a volume to the servers it is going to use when server list is
+ * replaced. We try to switch the attachment points to avoid rewalking the
+ * lists.
+ */
+void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new,
+ struct afs_server_list *old)
+{
+ unsigned int n = 0, o = 0;
+
+ down_write(&volume->cell->vs_lock);
+
+ while (n < new->nr_servers || o < old->nr_servers) {
+ struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL;
+ struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL;
+ struct afs_server_entry *s;
+ struct list_head *p;
+ int diff;
+
+ if (pn && po && pn->server == po->server) {
+ pn->cb_expires_at = po->cb_expires_at;
+ list_replace(&po->slink, &pn->slink);
+ n++;
+ o++;
+ continue;
+ }
+
+ if (pn && po)
+ diff = memcmp(&pn->server->uuid, &po->server->uuid,
+ sizeof(pn->server->uuid));
+ else
+ diff = pn ? -1 : 1;
+
+ if (diff < 0) {
+ list_for_each(p, &pn->server->volumes) {
+ s = list_entry(p, struct afs_server_entry, slink);
+ if (volume->vid <= s->volume->vid)
+ break;
+ }
+ list_add_tail(&pn->slink, p);
+ n++;
+ } else {
+ list_del(&po->slink);
+ o++;
+ }
+ }
+
+ up_write(&volume->cell->vs_lock);
+}
+
+/*
+ * Detach a volume from the servers it has been using.
+ */
+void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist)
+{
+ unsigned int i;
+
+ if (!slist->attached)
+ return;
+
+ down_write(&volume->cell->vs_lock);
+
+ for (i = 0; i < slist->nr_servers; i++)
+ list_del(&slist->servers[i].slink);
+
+ slist->attached = false;
+ up_write(&volume->cell->vs_lock);
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 95d713074dc8..d672b7ab57ae 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -55,7 +55,7 @@ int afs_net_id;
static const struct super_operations afs_super_ops = {
.statfs = afs_statfs,
.alloc_inode = afs_alloc_inode,
- .write_inode = afs_write_inode,
+ .write_inode = netfs_unpin_writeback,
.drop_inode = afs_drop_inode,
.destroy_inode = afs_destroy_inode,
.free_inode = afs_free_inode,
@@ -194,8 +194,6 @@ static int afs_show_options(struct seq_file *m, struct dentry *root)
if (as->dyn_root)
seq_puts(m, ",dyn");
- if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
- seq_puts(m, ",autocell");
switch (as->flock_mode) {
case afs_flock_mode_unset: break;
case afs_flock_mode_local: p = "local"; break;
@@ -292,13 +290,14 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
/* lookup the cell record */
if (cellname) {
cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
- NULL, false);
+ NULL, AFS_LOOKUP_CELL_DIRECT_MOUNT,
+ afs_cell_trace_use_lookup_mount);
if (IS_ERR(cell)) {
pr_err("kAFS: unable to lookup cell '%*.*s'\n",
cellnamesz, cellnamesz, cellname ?: "");
return PTR_ERR(cell);
}
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_parse);
afs_see_cell(cell, afs_cell_trace_see_source);
ctx->cell = cell;
}
@@ -381,8 +380,7 @@ static int afs_validate_fc(struct fs_context *fc)
ctx->key = key;
if (ctx->volume) {
- afs_put_volume(ctx->net, ctx->volume,
- afs_volume_trace_put_validate_fc);
+ afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc);
ctx->volume = NULL;
}
@@ -396,7 +394,7 @@ static int afs_validate_fc(struct fs_context *fc)
ctx->key = NULL;
cell = afs_use_cell(ctx->cell->alias_of,
afs_cell_trace_use_fc_alias);
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
ctx->cell = cell;
goto reget_key;
}
@@ -407,6 +405,10 @@ static int afs_validate_fc(struct fs_context *fc)
return PTR_ERR(volume);
ctx->volume = volume;
+ if (volume->type != AFSVL_RWVOL) {
+ ctx->flock_mode = afs_flock_mode_local;
+ fc->sb_flags |= SB_RDONLY;
+ }
}
return 0;
@@ -465,7 +467,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
/* allocate the root inode and dentry */
if (as->dyn_root) {
- inode = afs_iget_pseudo_dir(sb, true);
+ inode = afs_dynroot_iget_root(sb);
} else {
sprintf(sb->s_id, "%llu", as->volume->vid);
afs_activate_volume(as->volume);
@@ -475,21 +477,15 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
if (IS_ERR(inode))
return PTR_ERR(inode);
- if (ctx->autocell || as->dyn_root)
- set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
-
ret = -ENOMEM;
sb->s_root = d_make_root(inode);
if (!sb->s_root)
goto error;
if (as->dyn_root) {
- sb->s_d_op = &afs_dynroot_dentry_operations;
- ret = afs_dynroot_populate(sb);
- if (ret < 0)
- goto error;
+ set_default_d_op(sb, &afs_dynroot_dentry_operations);
} else {
- sb->s_d_op = &afs_fs_dentry_operations;
+ set_default_d_op(sb, &afs_fs_dentry_operations);
rcu_assign_pointer(as->volume->sb, sb);
}
@@ -524,9 +520,8 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
static void afs_destroy_sbi(struct afs_super_info *as)
{
if (as) {
- struct afs_net *net = afs_net(as->net_ns);
- afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi);
- afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi);
+ afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi);
+ afs_unuse_cell(as->cell, afs_cell_trace_unuse_sbi);
put_net(as->net_ns);
kfree(as);
}
@@ -536,9 +531,6 @@ static void afs_kill_super(struct super_block *sb)
{
struct afs_super_info *as = AFS_FS_S(sb);
- if (as->dyn_root)
- afs_dynroot_depopulate(sb);
-
/* Clear the callback interests (which will do ilookup5) before
* deactivating the superblock.
*/
@@ -611,8 +603,8 @@ static void afs_free_fc(struct fs_context *fc)
struct afs_fs_context *ctx = fc->fs_private;
afs_destroy_sbi(fc->s_fs_info);
- afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc);
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
+ afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
key_put(ctx->key);
kfree(ctx);
}
@@ -660,7 +652,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->netfs.inode);
- mutex_init(&vnode->io_lock);
+ INIT_LIST_HEAD(&vnode->io_lock_waiters);
init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
@@ -693,6 +685,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
vnode->volume = NULL;
vnode->lock_key = NULL;
vnode->permit_cache = NULL;
+ vnode->directory = NULL;
+ vnode->directory_size = 0;
vnode->flags = 1 << AFS_VNODE_UNSET;
vnode->lock_state = AFS_VNODE_LOCK_NONE;
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
new file mode 100644
index 000000000000..0ba8336c9025
--- /dev/null
+++ b/fs/afs/validation.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* vnode and volume validity verification.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+/*
+ * Data validation is managed through a number of mechanisms from the server:
+ *
+ * (1) On first contact with a server (such as if it has just been rebooted),
+ * the server sends us a CB.InitCallBackState* request.
+ *
+ * (2) On a RW volume, in response to certain vnode (inode)-accessing RPC
+ * calls, the server maintains a time-limited per-vnode promise that it
+ * will send us a CB.CallBack request if a third party alters the vnodes
+ * accessed.
+ *
+ * Note that a vnode-level callbacks may also be sent for other reasons,
+ * such as filelock release.
+ *
+ * (3) On a RO (or Backup) volume, in response to certain vnode-accessing RPC
+ * calls, each server maintains a time-limited per-volume promise that it
+ * will send us a CB.CallBack request if the RO volume is updated to a
+ * snapshot of the RW volume ("vos release"). This is an atomic event
+ * that cuts over all instances of the RO volume across multiple servers
+ * simultaneously.
+ *
+ * Note that a volume-level callbacks may also be sent for other reasons,
+ * such as the volumeserver taking over control of the volume from the
+ * fileserver.
+ *
+ * Note also that each server maintains an independent time limit on an
+ * independent callback.
+ *
+ * (4) Certain RPC calls include a volume information record "VolSync" in
+ * their reply. This contains a creation date for the volume that should
+ * remain unchanged for a RW volume (but will be changed if the volume is
+ * restored from backup) or will be bumped to the time of snapshotting
+ * when a RO volume is released.
+ *
+ * In order to track this events, the following are provided:
+ *
+ * ->cb_v_break. A counter of events that might mean that the contents of
+ * a volume have been altered since we last checked a vnode.
+ *
+ * ->cb_v_check. A counter of the number of events that we've sent a
+ * query to the server for. Everything's up to date if this equals
+ * cb_v_break.
+ *
+ * ->cb_scrub. A counter of the number of regression events for which we
+ * have to completely wipe the cache.
+ *
+ * ->cb_ro_snapshot. A counter of the number of times that we've
+ * recognised that a RO volume has been updated.
+ *
+ * ->cb_break. A counter of events that might mean that the contents of a
+ * vnode have been altered.
+ *
+ * ->cb_expires_at. The time at which the callback promise expires or
+ * AFS_NO_CB_PROMISE if we have no promise.
+ *
+ * The way we manage things is:
+ *
+ * (1) When a volume-level CB.CallBack occurs, we increment ->cb_v_break on
+ * the volume and reset ->cb_expires_at (ie. set AFS_NO_CB_PROMISE) on the
+ * volume and volume's server record.
+ *
+ * (2) When a CB.InitCallBackState occurs, we treat this as a volume-level
+ * callback break on all the volumes that have been using that volume
+ * (ie. increment ->cb_v_break and reset ->cb_expires_at).
+ *
+ * (3) When a vnode-level CB.CallBack occurs, we increment ->cb_break on the
+ * vnode and reset its ->cb_expires_at. If the vnode is mmapped, we also
+ * dispatch a work item to unmap all PTEs to the vnode's pagecache to
+ * force reentry to the filesystem for revalidation.
+ *
+ * (4) When entering the filesystem, we call afs_validate() to check the
+ * validity of a vnode. This first checks to see if ->cb_v_check and
+ * ->cb_v_break match, and if they don't, we lock volume->cb_check_lock
+ * exclusively and perform an FS.FetchStatus on the vnode.
+ *
+ * After checking the volume, we check the vnode. If there's a mismatch
+ * between the volume counters and the vnode's mirrors of those counters,
+ * we lock vnode->validate_lock and issue an FS.FetchStatus on the vnode.
+ *
+ * (5) When the reply from FS.FetchStatus arrives, the VolSync record is
+ * parsed:
+ *
+ * (A) If the Creation timestamp has changed on a RW volume or regressed
+ * on a RO volume, we try to increment ->cb_scrub; if it advances on a
+ * RO volume, we assume "vos release" happened and try to increment
+ * ->cb_ro_snapshot.
+ *
+ * (B) If the Update timestamp has regressed, we try to increment
+ * ->cb_scrub.
+ *
+ * Note that in both of these cases, we only do the increment if we can
+ * cmpxchg the value of the timestamp from the value we noted before the
+ * op. This tries to prevent parallel ops from fighting one another.
+ *
+ * volume->cb_v_check is then set to ->cb_v_break.
+ *
+ * (6) The AFSCallBack record included in the FS.FetchStatus reply is also
+ * parsed and used to set the promise in ->cb_expires_at for the vnode,
+ * the volume and the volume's server record.
+ *
+ * (7) If ->cb_scrub is seen to have advanced, we invalidate the pagecache for
+ * the vnode.
+ */
+
+/*
+ * Check the validity of a vnode/inode and its parent volume.
+ */
+bool afs_check_validity(const struct afs_vnode *vnode)
+{
+ const struct afs_volume *volume = vnode->volume;
+ enum afs_vnode_invalid_trace trace = afs_vnode_valid_trace;
+ time64_t cb_expires_at = atomic64_read(&vnode->cb_expires_at);
+ time64_t deadline = ktime_get_real_seconds() + 10;
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ return true;
+
+ if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break))
+ trace = afs_vnode_invalid_trace_cb_v_break;
+ else if (cb_expires_at == AFS_NO_CB_PROMISE)
+ trace = afs_vnode_invalid_trace_no_cb_promise;
+ else if (cb_expires_at <= deadline)
+ trace = afs_vnode_invalid_trace_expired;
+ else if (volume->cb_expires_at <= deadline)
+ trace = afs_vnode_invalid_trace_vol_expired;
+ else if (vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot))
+ trace = afs_vnode_invalid_trace_cb_ro_snapshot;
+ else if (vnode->cb_scrub != atomic_read(&volume->cb_scrub))
+ trace = afs_vnode_invalid_trace_cb_scrub;
+ else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ trace = afs_vnode_invalid_trace_zap_data;
+ else
+ return true;
+ trace_afs_vnode_invalid(vnode, trace);
+ return false;
+}
+
+/*
+ * See if the server we've just talked to is currently excluded.
+ */
+static bool __afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
+{
+ const struct afs_server_entry *se;
+ const struct afs_server_list *slist;
+ bool is_excluded = true;
+ int i;
+
+ rcu_read_lock();
+
+ slist = rcu_dereference(volume->servers);
+ for (i = 0; i < slist->nr_servers; i++) {
+ se = &slist->servers[i];
+ if (op->server == se->server) {
+ is_excluded = test_bit(AFS_SE_EXCLUDED, &se->flags);
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return is_excluded;
+}
+
+/*
+ * Update the volume's server list when the creation time changes and see if
+ * the server we've just talked to is currently excluded.
+ */
+static int afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
+{
+ int ret;
+
+ if (__afs_is_server_excluded(op, volume))
+ return 1;
+
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+ ret = afs_check_volume_status(op->volume, op);
+ if (ret < 0)
+ return ret;
+
+ return __afs_is_server_excluded(op, volume);
+}
+
+/*
+ * Handle a change to the volume creation time in the VolSync record.
+ */
+static int afs_update_volume_creation_time(struct afs_operation *op, struct afs_volume *volume)
+{
+ unsigned int snap;
+ time64_t cur = volume->creation_time;
+ time64_t old = op->pre_volsync.creation;
+ time64_t new = op->volsync.creation;
+ int ret;
+
+ _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new);
+
+ if (cur == TIME64_MIN) {
+ volume->creation_time = new;
+ return 0;
+ }
+
+ if (new == cur)
+ return 0;
+
+ /* Try to advance the creation timestamp from what we had before the
+ * operation to what we got back from the server. This should
+ * hopefully ensure that in a race between multiple operations only one
+ * of them will do this.
+ */
+ if (cur != old)
+ return 0;
+
+ /* If the creation time changes in an unexpected way, we need to scrub
+ * our caches. For a RW vol, this will only change if the volume is
+ * restored from a backup; for a RO/Backup vol, this will advance when
+ * the volume is updated to a new snapshot (eg. "vos release").
+ */
+ if (volume->type == AFSVL_RWVOL)
+ goto regressed;
+ if (volume->type == AFSVL_BACKVOL) {
+ if (new < old)
+ goto regressed;
+ goto advance;
+ }
+
+ /* We have an RO volume, we need to query the VL server and look at the
+ * server flags to see if RW->RO replication is in progress.
+ */
+ ret = afs_is_server_excluded(op, volume);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ snap = atomic_read(&volume->cb_ro_snapshot);
+ trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_volume_excluded);
+ return ret;
+ }
+
+advance:
+ snap = atomic_inc_return(&volume->cb_ro_snapshot);
+ trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_for_vos_release);
+ volume->creation_time = new;
+ return 0;
+
+regressed:
+ atomic_inc(&volume->cb_scrub);
+ trace_afs_cb_v_break(volume->vid, 0, afs_cb_break_for_creation_regress);
+ volume->creation_time = new;
+ return 0;
+}
+
+/*
+ * Handle a change to the volume update time in the VolSync record.
+ */
+static void afs_update_volume_update_time(struct afs_operation *op, struct afs_volume *volume)
+{
+ enum afs_cb_break_reason reason = afs_cb_break_no_break;
+ time64_t cur = volume->update_time;
+ time64_t old = op->pre_volsync.update;
+ time64_t new = op->volsync.update;
+
+ _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new);
+
+ if (cur == TIME64_MIN) {
+ volume->update_time = new;
+ return;
+ }
+
+ if (new == cur)
+ return;
+
+ /* If the volume update time changes in an unexpected way, we need to
+ * scrub our caches. For a RW vol, this will advance on every
+ * modification op; for a RO/Backup vol, this will advance when the
+ * volume is updated to a new snapshot (eg. "vos release").
+ */
+ if (new < old)
+ reason = afs_cb_break_for_update_regress;
+
+ /* Try to advance the update timestamp from what we had before the
+ * operation to what we got back from the server. This should
+ * hopefully ensure that in a race between multiple operations only one
+ * of them will do this.
+ */
+ if (cur == old) {
+ if (reason == afs_cb_break_for_update_regress) {
+ atomic_inc(&volume->cb_scrub);
+ trace_afs_cb_v_break(volume->vid, 0, reason);
+ }
+ volume->update_time = new;
+ }
+}
+
+static int afs_update_volume_times(struct afs_operation *op, struct afs_volume *volume)
+{
+ int ret = 0;
+
+ if (likely(op->volsync.creation == volume->creation_time &&
+ op->volsync.update == volume->update_time))
+ return 0;
+
+ mutex_lock(&volume->volsync_lock);
+ if (op->volsync.creation != volume->creation_time) {
+ ret = afs_update_volume_creation_time(op, volume);
+ if (ret < 0)
+ goto out;
+ }
+ if (op->volsync.update != volume->update_time)
+ afs_update_volume_update_time(op, volume);
+out:
+ mutex_unlock(&volume->volsync_lock);
+ return ret;
+}
+
+/*
+ * Update the state of a volume, including recording the expiration time of the
+ * callback promise. Returns 1 to redo the operation from the start.
+ */
+int afs_update_volume_state(struct afs_operation *op)
+{
+ struct afs_server_list *slist = op->server_list;
+ struct afs_server_entry *se = &slist->servers[op->server_index];
+ struct afs_callback *cb = &op->file[0].scb.callback;
+ struct afs_volume *volume = op->volume;
+ unsigned int cb_v_break = atomic_read(&volume->cb_v_break);
+ unsigned int cb_v_check = atomic_read(&volume->cb_v_check);
+ int ret;
+
+ _enter("%llx", op->volume->vid);
+
+ if (op->volsync.creation != TIME64_MIN || op->volsync.update != TIME64_MIN) {
+ ret = afs_update_volume_times(op, volume);
+ if (ret != 0) {
+ _leave(" = %d", ret);
+ return ret;
+ }
+ }
+
+ if (op->cb_v_break == cb_v_break &&
+ (op->file[0].scb.have_cb || op->file[1].scb.have_cb)) {
+ time64_t expires_at = cb->expires_at;
+
+ if (!op->file[0].scb.have_cb)
+ expires_at = op->file[1].scb.callback.expires_at;
+
+ se->cb_expires_at = expires_at;
+ volume->cb_expires_at = expires_at;
+ }
+ if (cb_v_check < op->cb_v_break)
+ atomic_cmpxchg(&volume->cb_v_check, cb_v_check, op->cb_v_break);
+ return 0;
+}
+
+/*
+ * mark the data attached to an inode as obsolete due to a write on the server
+ * - might also want to ditch all the outstanding writes and dirty pages
+ */
+static void afs_zap_data(struct afs_vnode *vnode)
+{
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ afs_invalidate_cache(vnode, 0);
+
+ /* nuke all the non-dirty pages that aren't locked, mapped or being
+ * written back in a regular file and completely discard the pages in a
+ * directory or symlink */
+ if (S_ISREG(vnode->netfs.inode.i_mode))
+ filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX);
+ else
+ filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX);
+}
+
+/*
+ * validate a vnode/inode
+ * - there are several things we need to check
+ * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ * symlink)
+ * - parent dir metadata changed (security changes)
+ * - dentry data changed (write, truncate)
+ * - dentry metadata changed (security changes)
+ */
+int afs_validate(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_volume *volume = vnode->volume;
+ unsigned int cb_ro_snapshot, cb_scrub;
+ time64_t deadline = ktime_get_real_seconds() + 10;
+ bool zap = false, locked_vol = false;
+ int ret;
+
+ _enter("{v={%llx:%llu} fl=%lx},%x",
+ vnode->fid.vid, vnode->fid.vnode, vnode->flags,
+ key_serial(key));
+
+ if (afs_check_validity(vnode))
+ return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
+
+ ret = down_write_killable(&vnode->validate_lock);
+ if (ret < 0)
+ goto error;
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ ret = -ESTALE;
+ goto error_unlock;
+ }
+
+ /* Validate a volume after the v_break has changed or the volume
+ * callback expired. We only want to do this once per volume per
+ * v_break change. The actual work will be done when parsing the
+ * status fetch reply.
+ */
+ if (volume->cb_expires_at <= deadline ||
+ atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break)) {
+ ret = mutex_lock_interruptible(&volume->cb_check_lock);
+ if (ret < 0)
+ goto error_unlock;
+ locked_vol = true;
+ }
+
+ cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot);
+ cb_scrub = atomic_read(&volume->cb_scrub);
+ if (vnode->cb_ro_snapshot != cb_ro_snapshot ||
+ vnode->cb_scrub != cb_scrub)
+ unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
+
+ if (vnode->cb_ro_snapshot != cb_ro_snapshot ||
+ vnode->cb_scrub != cb_scrub ||
+ volume->cb_expires_at <= deadline ||
+ atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
+ atomic64_read(&vnode->cb_expires_at) <= deadline
+ ) {
+ ret = afs_fetch_status(vnode, key, false, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ ret = -ESTALE;
+ }
+ goto error_unlock;
+ }
+
+ _debug("new promise [fl=%lx]", vnode->flags);
+ }
+
+ /* We can drop the volume lock now as. */
+ if (locked_vol) {
+ mutex_unlock(&volume->cb_check_lock);
+ locked_vol = false;
+ }
+
+ cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot);
+ cb_scrub = atomic_read(&volume->cb_scrub);
+ _debug("vnode inval %x==%x %x==%x",
+ vnode->cb_ro_snapshot, cb_ro_snapshot,
+ vnode->cb_scrub, cb_scrub);
+ if (vnode->cb_scrub != cb_scrub)
+ zap = true;
+ vnode->cb_ro_snapshot = cb_ro_snapshot;
+ vnode->cb_scrub = cb_scrub;
+
+ /* if the vnode's data version number changed then its contents are
+ * different */
+ zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+ if (zap)
+ afs_zap_data(vnode);
+ up_write(&vnode->validate_lock);
+ _leave(" = 0");
+ return 0;
+
+error_unlock:
+ if (locked_vol)
+ mutex_unlock(&volume->cb_check_lock);
+ up_write(&vnode->validate_lock);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index f04a80e4f5c3..fc9676abd252 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -33,55 +33,6 @@ static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *k
}
/*
- * Compare two addresses.
- */
-static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a,
- const struct sockaddr_rxrpc *srx_b)
-{
- short port_a, port_b;
- int addr_a, addr_b, diff;
-
- diff = (short)srx_a->transport_type - (short)srx_b->transport_type;
- if (diff)
- goto out;
-
- switch (srx_a->transport_type) {
- case AF_INET: {
- const struct sockaddr_in *a = &srx_a->transport.sin;
- const struct sockaddr_in *b = &srx_b->transport.sin;
- addr_a = ntohl(a->sin_addr.s_addr);
- addr_b = ntohl(b->sin_addr.s_addr);
- diff = addr_a - addr_b;
- if (diff == 0) {
- port_a = ntohs(a->sin_port);
- port_b = ntohs(b->sin_port);
- diff = port_a - port_b;
- }
- break;
- }
-
- case AF_INET6: {
- const struct sockaddr_in6 *a = &srx_a->transport.sin6;
- const struct sockaddr_in6 *b = &srx_b->transport.sin6;
- diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16);
- if (diff == 0) {
- port_a = ntohs(a->sin6_port);
- port_b = ntohs(b->sin6_port);
- diff = port_a - port_b;
- }
- break;
- }
-
- default:
- WARN_ON(1);
- diff = 1;
- }
-
-out:
- return diff;
-}
-
-/*
* Compare the address lists of a pair of fileservers.
*/
static int afs_compare_fs_alists(const struct afs_server *server_a,
@@ -90,13 +41,13 @@ static int afs_compare_fs_alists(const struct afs_server *server_a,
const struct afs_addr_list *la, *lb;
int a = 0, b = 0, addr_matches = 0;
- la = rcu_dereference(server_a->addresses);
- lb = rcu_dereference(server_b->addresses);
+ la = rcu_dereference(server_a->endpoint_state)->addresses;
+ lb = rcu_dereference(server_b->endpoint_state)->addresses;
while (a < la->nr_addrs && b < lb->nr_addrs) {
- const struct sockaddr_rxrpc *srx_a = &la->addrs[a];
- const struct sockaddr_rxrpc *srx_b = &lb->addrs[b];
- int diff = afs_compare_addrs(srx_a, srx_b);
+ unsigned long pa = (unsigned long)la->addrs[a].peer;
+ unsigned long pb = (unsigned long)lb->addrs[b].peer;
+ long diff = pa - pb;
if (diff < 0) {
a++;
@@ -126,7 +77,7 @@ static int afs_compare_volume_slists(const struct afs_volume *vol_a,
lb = rcu_dereference(vol_b->servers);
for (i = 0; i < AFS_MAXTYPES; i++)
- if (la->vids[i] != lb->vids[i])
+ if (vol_a->vids[i] != vol_b->vids[i])
return 0;
while (a < la->nr_servers && b < lb->nr_servers) {
@@ -205,7 +156,7 @@ static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
/* And see if it's in the new cell. */
volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len);
if (IS_ERR(volume)) {
- afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias);
+ afs_put_volume(pvol, afs_volume_trace_put_query_alias);
if (PTR_ERR(volume) != -ENOMEDIUM)
return PTR_ERR(volume);
/* That volume is not in the new cell, so not an alias */
@@ -223,8 +174,8 @@ static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
rcu_read_unlock();
}
- afs_put_volume(cell->net, volume, afs_volume_trace_put_query_alias);
- afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias);
+ afs_put_volume(volume, afs_volume_trace_put_query_alias);
+ afs_put_volume(pvol, afs_volume_trace_put_query_alias);
return ret;
}
@@ -254,11 +205,11 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
goto is_alias;
if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
- afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
+ afs_unuse_cell(p, afs_cell_trace_unuse_check_alias);
return -ERESTARTSYS;
}
- afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
+ afs_unuse_cell(p, afs_cell_trace_unuse_check_alias);
}
mutex_unlock(&cell->net->proc_cells_lock);
@@ -285,7 +236,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
while (afs_select_vlserver(&vc)) {
if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) {
- vc.ac.error = -EOPNOTSUPP;
+ vc.call_error = -EOPNOTSUPP;
skipped = true;
continue;
}
@@ -302,6 +253,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
{
struct afs_cell *master;
+ size_t name_len;
char *cell_name;
cell_name = afs_vl_get_cell_name(cell, key);
@@ -313,8 +265,13 @@ static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
return 0;
}
- master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name),
- NULL, false);
+ name_len = strlen(cell_name);
+ if (!name_len || name_len > AFS_MAXCELLNAME)
+ master = ERR_PTR(-EOPNOTSUPP);
+ else
+ master = afs_lookup_cell(cell->net, cell_name, name_len, NULL,
+ AFS_LOOKUP_CELL_ALIAS_CHECK,
+ afs_cell_trace_use_lookup_canonical);
kfree(cell_name);
if (IS_ERR(master))
return PTR_ERR(master);
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index acc48216136a..9b1c20daac53 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -13,6 +13,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
unsigned short port)
{
struct afs_vlserver *vlserver;
+ static atomic_t debug_ids;
vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
GFP_KERNEL);
@@ -21,8 +22,10 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
rwlock_init(&vlserver->lock);
init_waitqueue_head(&vlserver->probe_wq);
spin_lock_init(&vlserver->probe_lock);
+ vlserver->debug_id = atomic_inc_return(&debug_ids);
vlserver->rtt = UINT_MAX;
vlserver->name_len = name_len;
+ vlserver->service_id = VL_SERVICE;
vlserver->port = port;
memcpy(vlserver->name, name, name_len);
}
@@ -33,7 +36,8 @@ static void afs_vlserver_rcu(struct rcu_head *rcu)
{
struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
- afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+ afs_put_addrlist(rcu_access_pointer(vlserver->addresses),
+ afs_alist_trace_put_vlserver);
kfree_rcu(vlserver, rcu);
}
@@ -83,14 +87,15 @@ static u16 afs_extract_le16(const u8 **_b)
/*
* Build a VL server address list from a DNS queried server list.
*/
-static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+static struct afs_addr_list *afs_extract_vl_addrs(struct afs_net *net,
+ const u8 **_b, const u8 *end,
u8 nr_addrs, u16 port)
{
struct afs_addr_list *alist;
const u8 *b = *_b;
int ret = -EINVAL;
- alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+ alist = afs_alloc_addrlist(nr_addrs);
if (!alist)
return ERR_PTR(-ENOMEM);
if (nr_addrs == 0)
@@ -109,7 +114,9 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
goto error;
}
memcpy(x, b, 4);
- afs_merge_fs_addr4(alist, x[0], port);
+ ret = afs_merge_fs_addr4(net, alist, x[0], port);
+ if (ret < 0)
+ goto error;
b += 4;
break;
@@ -119,7 +126,9 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
goto error;
}
memcpy(x, b, 16);
- afs_merge_fs_addr6(alist, x, port);
+ ret = afs_merge_fs_addr6(net, alist, x, port);
+ if (ret < 0)
+ goto error;
b += 16;
break;
@@ -140,7 +149,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
error:
*_b = b;
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
return ERR_PTR(ret);
}
@@ -247,7 +256,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
/* Extract the addresses - note that we can't skip this as we
* have to advance the payload pointer.
*/
- addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
+ addrs = afs_extract_vl_addrs(cell->net, &b, end, bs.nr_addrs, bs.port);
if (IS_ERR(addrs)) {
ret = PTR_ERR(addrs);
goto error_2;
@@ -255,7 +264,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
if (vllist->nr_servers >= nr_servers) {
_debug("skip %u >= %u", vllist->nr_servers, nr_servers);
- afs_put_addrlist(addrs);
+ afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty);
afs_put_vlserver(cell->net, server);
continue;
}
@@ -264,7 +273,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
addrs->status = bs.status;
if (addrs->nr_addrs == 0) {
- afs_put_addrlist(addrs);
+ afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty);
if (!rcu_access_pointer(server->addresses)) {
afs_put_vlserver(cell->net, server);
continue;
@@ -276,7 +285,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
old = rcu_replace_pointer(server->addresses, old,
lockdep_is_held(&server->lock));
write_unlock(&server->lock);
- afs_put_addrlist(old);
+ afs_put_addrlist(old, afs_alist_trace_put_vlserver_old);
}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index d1c7068b4346..3d2e0c925460 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -46,11 +46,12 @@ static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up)
*/
void afs_vlserver_probe_result(struct afs_call *call)
{
- struct afs_addr_list *alist = call->alist;
+ struct afs_addr_list *alist = call->vl_probe;
struct afs_vlserver *server = call->vlserver;
+ struct afs_address *addr = &alist->addrs[call->probe_index];
unsigned int server_index = call->server_index;
unsigned int rtt_us = 0;
- unsigned int index = call->addr_ix;
+ unsigned int index = call->probe_index;
bool have_result = false;
int ret = call->error;
@@ -89,7 +90,7 @@ void afs_vlserver_probe_result(struct afs_call *call)
case -ETIME:
default:
clear_bit(index, &alist->responded);
- set_bit(index, &alist->failed);
+ set_bit(index, &alist->probe_failed);
if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) &&
(server->probe.error == 0 ||
server->probe.error == -ETIMEDOUT ||
@@ -101,22 +102,22 @@ void afs_vlserver_probe_result(struct afs_call *call)
responded:
set_bit(index, &alist->responded);
- clear_bit(index, &alist->failed);
+ clear_bit(index, &alist->probe_failed);
if (call->service_id == YFS_VL_SERVICE) {
server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS;
set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
} else {
server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS;
if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) {
clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
}
}
- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
- rtt_us < server->probe.rtt) {
+ rtt_us = rxrpc_kernel_get_srtt(addr->peer);
+ if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
@@ -130,8 +131,10 @@ responded:
out:
spin_unlock(&server->probe_lock);
- _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
- server_index, index, &alist->addrs[index].transport, rtt_us, ret);
+ trace_afs_vl_probe(server, false, alist, index, call->error, call->abort_code, rtt_us);
+ _debug("probe [%u][%u] %pISpc rtt=%d ret=%d",
+ server_index, index, rxrpc_kernel_remote_addr(addr->peer),
+ rtt_us, ret);
afs_done_one_vl_probe(server, have_result);
}
@@ -146,35 +149,52 @@ static bool afs_do_probe_vlserver(struct afs_net *net,
unsigned int server_index,
struct afs_error *_e)
{
- struct afs_addr_cursor ac = {
- .index = 0,
- };
+ struct afs_addr_list *alist;
struct afs_call *call;
+ unsigned long unprobed;
+ unsigned int index, i;
bool in_progress = false;
+ int best_prio;
_enter("%s", server->name);
read_lock(&server->lock);
- ac.alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->lock));
+ alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->lock));
+ afs_get_addrlist(alist, afs_alist_trace_get_vlprobe);
read_unlock(&server->lock);
- atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ atomic_set(&server->probe_outstanding, alist->nr_addrs);
memset(&server->probe, 0, sizeof(server->probe));
server->probe.rtt = UINT_MAX;
- for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
- call = afs_vl_get_capabilities(net, &ac, key, server,
+ unprobed = (1UL << alist->nr_addrs) - 1;
+ while (unprobed) {
+ best_prio = -1;
+ index = 0;
+ for (i = 0; i < alist->nr_addrs; i++) {
+ if (test_bit(i, &unprobed) &&
+ alist->addrs[i].prio > best_prio) {
+ index = i;
+ best_prio = alist->addrs[i].prio;
+ }
+ }
+ __clear_bit(index, &unprobed);
+
+ trace_afs_vl_probe(server, true, alist, index, 0, 0, 0);
+ call = afs_vl_get_capabilities(net, alist, index, key, server,
server_index);
if (!IS_ERR(call)) {
+ afs_prioritise_error(_e, call->error, call->abort_code);
afs_put_call(call);
in_progress = true;
} else {
- afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+ afs_prioritise_error(_e, PTR_ERR(call), 0);
afs_done_one_vl_probe(server, false);
}
}
+ afs_put_addrlist(alist, afs_alist_trace_put_vlprobe);
return in_progress;
}
@@ -185,12 +205,10 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
struct afs_vlserver_list *vllist)
{
struct afs_vlserver *server;
- struct afs_error e;
+ struct afs_error e = {};
bool in_progress = false;
int i;
- e.error = 0;
- e.responded = false;
for (i = 0; i < vllist->nr_servers; i++) {
server = vllist->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index 488e58490b16..6ad9688d8f4b 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -17,18 +17,21 @@
bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
struct key *key)
{
+ static atomic_t debug_ids;
+
memset(vc, 0, sizeof(*vc));
vc->cell = cell;
vc->key = key;
- vc->error = -EDESTADDRREQ;
- vc->ac.error = SHRT_MAX;
+ vc->cumul_error.error = -EDESTADDRREQ;
+ vc->nr_iterations = -1;
if (signal_pending(current)) {
- vc->error = -EINTR;
+ vc->cumul_error.error = -EINTR;
vc->flags |= AFS_VL_CURSOR_STOP;
return false;
}
+ vc->debug_id = atomic_inc_return(&debug_ids);
return true;
}
@@ -45,21 +48,27 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
cell->dns_expiry <= ktime_get_real_seconds()) {
dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
- afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
+ afs_queue_cell(cell, afs_cell_trace_queue_dns);
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
if (wait_var_event_interruptible(
&cell->dns_lookup_count,
smp_load_acquire(&cell->dns_lookup_count)
!= dns_lookup_count) < 0) {
- vc->error = -ERESTARTSYS;
+ vc->cumul_error.error = -ERESTARTSYS;
return false;
}
}
/* Status load is ordered after lookup counter load */
+ if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
+ pr_warn("No record of cell %s\n", cell->name);
+ vc->cumul_error.error = -ENOENT;
+ return false;
+ }
+
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
- vc->error = -EDESTADDRREQ;
+ vc->cumul_error.error = -EDESTADDRREQ;
return false;
}
}
@@ -72,8 +81,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
if (!vc->server_list->nr_servers)
return false;
- vc->untried = (1UL << vc->server_list->nr_servers) - 1;
- vc->index = -1;
+ vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
+ vc->server_index = -1;
return true;
}
@@ -83,54 +92,57 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
*/
bool afs_select_vlserver(struct afs_vl_cursor *vc)
{
- struct afs_addr_list *alist;
+ struct afs_addr_list *alist = vc->alist;
struct afs_vlserver *vlserver;
- struct afs_error e;
- u32 rtt;
- int error = vc->ac.error, i;
+ unsigned long set, failed;
+ unsigned int rtt;
+ s32 abort_code = vc->call_abort_code;
+ int error = vc->call_error, i;
- _enter("%lx[%d],%lx[%d],%d,%d",
- vc->untried, vc->index,
- vc->ac.tried, vc->ac.index,
- error, vc->ac.abort_code);
+ vc->nr_iterations++;
+
+ _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
+ vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
+ vc->addr_index, vc->addr_tried,
+ error, abort_code);
if (vc->flags & AFS_VL_CURSOR_STOP) {
_leave(" = f [stopped]");
return false;
}
- vc->nr_iterations++;
+ if (vc->nr_iterations == 0)
+ goto start;
+
+ WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
/* Evaluate the result of the previous operation, if there was one. */
switch (error) {
- case SHRT_MAX:
- goto start;
-
default:
case 0:
/* Success or local failure. Stop. */
- vc->error = error;
+ vc->cumul_error.error = error;
vc->flags |= AFS_VL_CURSOR_STOP;
- _leave(" = f [okay/local %d]", vc->ac.error);
+ _leave(" = f [okay/local %d]", vc->cumul_error.error);
return false;
case -ECONNABORTED:
/* The far side rejected the operation on some grounds. This
* might involve the server being busy or the volume having been moved.
*/
- switch (vc->ac.abort_code) {
+ switch (abort_code) {
case AFSVL_IO:
case AFSVL_BADVOLOPER:
case AFSVL_NOMEM:
/* The server went weird. */
- vc->error = -EREMOTEIO;
+ afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
//write_lock(&vc->cell->vl_servers_lock);
- //vc->server_list->weird_mask |= 1 << vc->index;
+ //vc->server_list->weird_mask |= 1 << vc->server_index;
//write_unlock(&vc->cell->vl_servers_lock);
goto next_server;
default:
- vc->error = afs_abort_to_error(vc->ac.abort_code);
+ afs_prioritise_error(&vc->cumul_error, error, abort_code);
goto failed;
}
@@ -143,12 +155,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
case -ETIMEDOUT:
case -ETIME:
_debug("no conn %d", error);
- vc->error = error;
+ afs_prioritise_error(&vc->cumul_error, error, 0);
goto iterate_address;
case -ECONNRESET:
_debug("call reset");
- vc->error = error;
+ afs_prioritise_error(&vc->cumul_error, error, 0);
vc->flags |= AFS_VL_CURSOR_RETRY;
goto next_server;
@@ -159,7 +171,13 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
restart_from_beginning:
_debug("restart");
- afs_end_cursor(&vc->ac);
+ if (vc->call_responded &&
+ vc->addr_index != vc->alist->preferred &&
+ test_bit(alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(alist->preferred, vc->addr_index);
+ afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
+ alist = vc->alist = NULL;
+
afs_put_vlserverlist(vc->cell->net, vc->server_list);
vc->server_list = NULL;
if (vc->flags & AFS_VL_CURSOR_RETRIED)
@@ -167,53 +185,58 @@ restart_from_beginning:
vc->flags |= AFS_VL_CURSOR_RETRIED;
start:
_debug("start");
+ ASSERTCMP(alist, ==, NULL);
if (!afs_start_vl_iteration(vc))
goto failed;
error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
- if (error < 0)
- goto failed_set_error;
+ if (error < 0) {
+ afs_prioritise_error(&vc->cumul_error, error, 0);
+ goto failed;
+ }
pick_server:
- _debug("pick [%lx]", vc->untried);
+ _debug("pick [%lx]", vc->untried_servers);
+ ASSERTCMP(alist, ==, NULL);
- error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
- if (error < 0)
- goto failed_set_error;
+ error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
+ if (error < 0) {
+ afs_prioritise_error(&vc->cumul_error, error, 0);
+ goto failed;
+ }
/* Pick the untried server with the lowest RTT. */
- vc->index = vc->server_list->preferred;
- if (test_bit(vc->index, &vc->untried))
+ vc->server_index = vc->server_list->preferred;
+ if (test_bit(vc->server_index, &vc->untried_servers))
goto selected_server;
- vc->index = -1;
- rtt = U32_MAX;
+ vc->server_index = -1;
+ rtt = UINT_MAX;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
- if (!test_bit(i, &vc->untried) ||
+ if (!test_bit(i, &vc->untried_servers) ||
!test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
continue;
- if (s->probe.rtt < rtt) {
- vc->index = i;
+ if (s->probe.rtt <= rtt) {
+ vc->server_index = i;
rtt = s->probe.rtt;
}
}
- if (vc->index == -1)
+ if (vc->server_index == -1)
goto no_more_servers;
selected_server:
- _debug("use %d", vc->index);
- __clear_bit(vc->index, &vc->untried);
+ _debug("use %d", vc->server_index);
+ __clear_bit(vc->server_index, &vc->untried_servers);
/* We're starting on a different vlserver from the list. We need to
* check it, find its address list and probe its capabilities before we
* use it.
*/
- ASSERTCMP(vc->ac.alist, ==, NULL);
- vlserver = vc->server_list->servers[vc->index].server;
+ vlserver = vc->server_list->servers[vc->server_index].server;
vc->server = vlserver;
_debug("USING VLSERVER: %s", vlserver->name);
@@ -221,34 +244,48 @@ selected_server:
read_lock(&vlserver->lock);
alist = rcu_dereference_protected(vlserver->addresses,
lockdep_is_held(&vlserver->lock));
- afs_get_addrlist(alist);
+ vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
read_unlock(&vlserver->lock);
- memset(&vc->ac, 0, sizeof(vc->ac));
-
- if (!vc->ac.alist)
- vc->ac.alist = alist;
- else
- afs_put_addrlist(alist);
-
- vc->ac.index = -1;
+ vc->addr_tried = 0;
+ vc->addr_index = -1;
iterate_address:
- ASSERT(vc->ac.alist);
/* Iterate over the current server's address list to try and find an
* address on which it will respond to us.
*/
- if (!afs_iterate_addresses(&vc->ac))
+ set = READ_ONCE(alist->responded);
+ failed = READ_ONCE(alist->probe_failed);
+ vc->addr_index = READ_ONCE(alist->preferred);
+
+ _debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
+
+ set &= ~(failed | vc->addr_tried);
+
+ if (!set)
goto next_server;
- _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+ if (!test_bit(vc->addr_index, &set))
+ vc->addr_index = __ffs(set);
+
+ set_bit(vc->addr_index, &vc->addr_tried);
+ vc->alist = alist;
- _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
+ _debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
+
+ vc->call_responded = false;
+ _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
return true;
next_server:
_debug("next");
- afs_end_cursor(&vc->ac);
+ ASSERT(alist);
+ if (vc->call_responded &&
+ vc->addr_index != alist->preferred &&
+ test_bit(alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(alist->preferred, vc->addr_index);
+ afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
+ alist = vc->alist = NULL;
goto pick_server;
no_more_servers:
@@ -258,25 +295,26 @@ no_more_servers:
if (vc->flags & AFS_VL_CURSOR_RETRY)
goto restart_from_beginning;
- e.error = -EDESTADDRREQ;
- e.responded = false;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
- e.responded = true;
- afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+ vc->cumul_error.responded = true;
+ afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
s->probe.abort_code);
}
- error = e.error;
-
-failed_set_error:
- vc->error = error;
failed:
+ if (alist) {
+ if (vc->call_responded &&
+ vc->addr_index != alist->preferred &&
+ test_bit(alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(alist->preferred, vc->addr_index);
+ afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
+ alist = vc->alist = NULL;
+ }
vc->flags |= AFS_VL_CURSOR_STOP;
- afs_end_cursor(&vc->ac);
- _leave(" = f [failed %d]", vc->error);
+ _leave(" = f [failed %d]", vc->cumul_error.error);
return false;
}
@@ -285,6 +323,7 @@ failed:
*/
static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
{
+ struct afs_cell *cell = vc->cell;
static int count;
int i;
@@ -294,8 +333,14 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
rcu_read_lock();
pr_notice("EDESTADDR occurred\n");
+ pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
+ pr_notice("DNS: src=%u st=%u lc=%x\n",
+ cell->dns_source, cell->dns_status, cell->dns_lookup_count);
pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
- vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
+ vc->untried_servers, vc->server_index, vc->nr_iterations,
+ vc->flags, vc->cumul_error.error);
+ pr_notice("VC: call er=%d ac=%d r=%u\n",
+ vc->call_error, vc->call_abort_code, vc->call_responded);
if (vc->server_list) {
const struct afs_vlserver_list *sl = vc->server_list;
@@ -312,16 +357,14 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
a->nr_ipv4, a->nr_addrs, a->max_addrs,
a->preferred);
pr_notice("VC: - R=%lx F=%lx\n",
- a->responded, a->failed);
- if (a == vc->ac.alist)
+ a->responded, a->probe_failed);
+ if (a == vc->alist)
pr_notice("VC: - current\n");
}
}
}
- pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
- vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
- vc->ac.responded, vc->ac.nr_iterations);
+ pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
rcu_read_unlock();
}
@@ -332,17 +375,25 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
{
struct afs_net *net = vc->cell->net;
- if (vc->error == -EDESTADDRREQ ||
- vc->error == -EADDRNOTAVAIL ||
- vc->error == -ENETUNREACH ||
- vc->error == -EHOSTUNREACH)
+ _enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
+
+ switch (vc->cumul_error.error) {
+ case -EDESTADDRREQ:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
afs_vl_dump_edestaddrreq(vc);
+ break;
+ }
- afs_end_cursor(&vc->ac);
+ if (vc->alist) {
+ if (vc->call_responded &&
+ vc->addr_index != vc->alist->preferred &&
+ test_bit(vc->alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(vc->alist->preferred, vc->addr_index);
+ afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
+ vc->alist = NULL;
+ }
afs_put_vlserverlist(net, vc->server_list);
-
- if (vc->error == -ECONNABORTED)
- vc->error = afs_abort_to_error(vc->ac.abort_code);
-
- return vc->error;
+ return vc->cumul_error.error;
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 00fca3c66ba6..3a23c0b08eb6 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -18,8 +18,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
{
struct afs_uvldbentry__xdr *uvldb;
struct afs_vldb_entry *entry;
- bool new_only = false;
- u32 tmp, nr_servers, vlflags;
+ u32 nr_servers, vlflags;
int i, ret;
_enter("");
@@ -41,27 +40,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
entry->name[i] = 0;
entry->name_len = strlen(entry->name);
- /* If there is a new replication site that we can use, ignore all the
- * sites that aren't marked as new.
- */
- for (i = 0; i < nr_servers; i++) {
- tmp = ntohl(uvldb->serverFlags[i]);
- if (!(tmp & AFS_VLSF_DONTUSE) &&
- (tmp & AFS_VLSF_NEWREPSITE))
- new_only = true;
- }
-
vlflags = ntohl(uvldb->flags);
for (i = 0; i < nr_servers; i++) {
struct afs_uuid__xdr *xdr;
struct afs_uuid *uuid;
+ u32 tmp = ntohl(uvldb->serverFlags[i]);
int j;
int n = entry->nr_servers;
- tmp = ntohl(uvldb->serverFlags[i]);
- if (tmp & AFS_VLSF_DONTUSE ||
- (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
- continue;
if (tmp & AFS_VLSF_RWVOL) {
entry->fs_mask[n] |= AFS_VOL_VTM_RW;
if (vlflags & AFS_VLF_BACKEXISTS)
@@ -82,6 +68,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
for (j = 0; j < 6; j++)
uuid->node[j] = (u8)ntohl(xdr->node[j]);
+ entry->vlsf_flags[n] = tmp;
entry->addr_version[n] = ntohl(uvldb->serverUnique[i]);
entry->nr_servers++;
}
@@ -106,12 +93,6 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
return 0;
}
-static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
-{
- kfree(call->ret_vldb);
- afs_flat_call_destructor(call);
-}
-
/*
* VL.GetEntryByNameU operation type.
*/
@@ -119,7 +100,7 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = {
.name = "VL.GetEntryByNameU",
.op = afs_VL_GetEntryByNameU,
.deliver = afs_deliver_vl_get_entry_by_name_u,
- .destructor = afs_destroy_vl_get_entry_by_name_u,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -155,6 +136,8 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
call->key = vc->key;
call->ret_vldb = entry;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* Marshall the parameters */
bp = call->request;
@@ -165,8 +148,17 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
memset((void *)bp + volnamesz, 0, padsz);
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (struct afs_vldb_entry *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ afs_put_call(call);
+ if (vc->call_error) {
+ kfree(entry);
+ return ERR_PTR(vc->call_error);
+ }
+ return entry;
}
/*
@@ -208,7 +200,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
count = ntohl(*bp);
nentries = min(nentries, count);
- alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT);
+ alist = afs_alloc_addrlist(nentries);
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
@@ -230,9 +222,13 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
alist = call->ret_alist;
bp = call->buffer;
count = min(call->count, 4U);
- for (i = 0; i < count; i++)
- if (alist->nr_addrs < call->count2)
- afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
+ for (i = 0; i < count; i++) {
+ if (alist->nr_addrs < call->count2) {
+ ret = afs_merge_fs_addr4(call->net, alist, *bp++, AFS_FS_PORT);
+ if (ret < 0)
+ return ret;
+ }
+ }
call->count -= count;
if (call->count > 0)
@@ -245,12 +241,6 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
return 0;
}
-static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
-{
- afs_put_addrlist(call->ret_alist);
- return afs_flat_call_destructor(call);
-}
-
/*
* VL.GetAddrsU operation type.
*/
@@ -258,7 +248,7 @@ static const struct afs_call_type afs_RXVLGetAddrsU = {
.name = "VL.GetAddrsU",
.op = afs_VL_GetAddrsU,
.deliver = afs_deliver_vl_get_addrs_u,
- .destructor = afs_vl_get_addrs_u_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -269,6 +259,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
const uuid_t *uuid)
{
struct afs_ListAddrByAttributes__xdr *r;
+ struct afs_addr_list *alist;
const struct afs_uuid *u = (const struct afs_uuid *)uuid;
struct afs_call *call;
struct afs_net *net = vc->cell->net;
@@ -286,6 +277,8 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
call->key = vc->key;
call->ret_alist = NULL;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* Marshall the parameters */
bp = call->request;
@@ -304,8 +297,18 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
r->uuid.node[i] = htonl(u->node[i]);
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ alist = call->ret_alist;
+ afs_put_call(call);
+ if (vc->call_error) {
+ afs_put_addrlist(alist, afs_alist_trace_put_getaddru);
+ return ERR_PTR(vc->call_error);
+ }
+ return alist;
}
/*
@@ -355,6 +358,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
static void afs_destroy_vl_get_capabilities(struct afs_call *call)
{
+ afs_put_addrlist(call->vl_probe, afs_alist_trace_put_vlgetcaps);
afs_put_vlserver(call->net, call->vlserver);
afs_flat_call_destructor(call);
}
@@ -366,6 +370,7 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
.name = "VL.GetCapabilities",
.op = afs_VL_GetCapabilities,
.deliver = afs_deliver_vl_get_capabilities,
+ .immediate_cancel = afs_vlserver_probe_result,
.done = afs_vlserver_probe_result,
.destructor = afs_destroy_vl_get_capabilities,
};
@@ -378,7 +383,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
* other end supports.
*/
struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
- struct afs_addr_cursor *ac,
+ struct afs_addr_list *alist,
+ unsigned int addr_index,
struct key *key,
struct afs_vlserver *server,
unsigned int server_index)
@@ -395,6 +401,10 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
call->key = key;
call->vlserver = afs_get_vlserver(server);
call->server_index = server_index;
+ call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer);
+ call->vl_probe = afs_get_addrlist(alist, afs_alist_trace_get_vlgetcaps);
+ call->probe_index = addr_index;
+ call->service_id = server->service_id;
call->upgrade = true;
call->async = true;
call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
@@ -405,7 +415,7 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
/* Can't take a ref on server */
trace_afs_make_vl_call(call);
- afs_make_call(ac, call, GFP_KERNEL);
+ afs_make_call(call, GFP_KERNEL);
return call;
}
@@ -450,7 +460,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (call->count > YFS_MAXENDPOINTS)
return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num);
- alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
+ alist = afs_alloc_addrlist(call->count);
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
@@ -488,14 +498,18 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (ntohl(bp[0]) != sizeof(__be32) * 2)
return afs_protocol_error(
call, afs_eproto_yvl_fsendpt4_len);
- afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
+ ret = afs_merge_fs_addr4(call->net, alist, bp[1], ntohl(bp[2]));
+ if (ret < 0)
+ return ret;
bp += 3;
break;
case YFS_ENDPOINT_IPV6:
if (ntohl(bp[0]) != sizeof(__be32) * 5)
return afs_protocol_error(
call, afs_eproto_yvl_fsendpt6_len);
- afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
+ ret = afs_merge_fs_addr6(call->net, alist, bp + 1, ntohl(bp[5]));
+ if (ret < 0)
+ return ret;
bp += 6;
break;
default:
@@ -610,7 +624,7 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
.name = "YFSVL.GetEndpoints",
.op = afs_YFSVL_GetEndpoints,
.deliver = afs_deliver_yfsvl_get_endpoints,
- .destructor = afs_vl_get_addrs_u_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -620,6 +634,7 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
const uuid_t *uuid)
{
+ struct afs_addr_list *alist;
struct afs_call *call;
struct afs_net *net = vc->cell->net;
__be32 *bp;
@@ -635,6 +650,8 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
call->key = vc->key;
call->ret_alist = NULL;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* Marshall the parameters */
bp = call->request;
@@ -643,8 +660,18 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ alist = call->ret_alist;
+ afs_put_call(call);
+ if (vc->call_error) {
+ afs_put_addrlist(alist, afs_alist_trace_put_getaddru);
+ return ERR_PTR(vc->call_error);
+ }
+ return alist;
}
/*
@@ -671,7 +698,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
return ret;
namesz = ntohl(call->tmp);
- if (namesz > AFS_MAXCELLNAME)
+ if (namesz > YFS_VL_MAXCELLNAME)
return afs_protocol_error(call, afs_eproto_cellname_len);
paddedsz = (namesz + 3) & ~3;
call->count = namesz;
@@ -709,12 +736,6 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
return 0;
}
-static void afs_destroy_yfsvl_get_cell_name(struct afs_call *call)
-{
- kfree(call->ret_str);
- afs_flat_call_destructor(call);
-}
-
/*
* VL.GetCapabilities operation type
*/
@@ -722,7 +743,7 @@ static const struct afs_call_type afs_YFSVLGetCellName = {
.name = "YFSVL.GetCellName",
.op = afs_YFSVL_GetCellName,
.deliver = afs_deliver_yfsvl_get_cell_name,
- .destructor = afs_destroy_yfsvl_get_cell_name,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -737,6 +758,7 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
struct afs_call *call;
struct afs_net *net = vc->cell->net;
__be32 *bp;
+ char *cellname;
_enter("");
@@ -747,6 +769,8 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
call->key = vc->key;
call->ret_str = NULL;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* marshall the parameters */
bp = call->request;
@@ -754,6 +778,16 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
/* Can't take a ref on server */
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (char *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ cellname = call->ret_str;
+ afs_put_call(call);
+ if (vc->call_error) {
+ kfree(cellname);
+ return ERR_PTR(vc->call_error);
+ }
+ return cellname;
}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 29d483c80281..0efff3d25133 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -10,6 +10,9 @@
#include "internal.h"
static unsigned __read_mostly afs_volume_record_life = 60 * 60;
+static atomic_t afs_volume_debug_id;
+
+static void afs_destroy_volume(struct work_struct *work);
/*
* Insert a volume into a cell. If there's an existing volume record, that is
@@ -32,8 +35,13 @@ static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
} else if (p->vid > volume->vid) {
pp = &(*pp)->rb_right;
} else {
- volume = afs_get_volume(p, afs_volume_trace_get_cell_insert);
- goto found;
+ if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
+ volume = p;
+ goto found;
+ }
+
+ set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
+ rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
}
}
@@ -52,11 +60,12 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
struct afs_cell *cell = volume->cell;
if (!hlist_unhashed(&volume->proc_link)) {
- trace_afs_volume(volume->vid, refcount_read(&cell->ref),
+ trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref),
afs_volume_trace_remove);
write_seqlock(&cell->volume_lock);
hlist_del_rcu(&volume->proc_link);
- rb_erase(&volume->cell_node, &cell->volumes);
+ if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
+ rb_erase(&volume->cell_node, &cell->volumes);
write_sequnlock(&cell->volume_lock);
}
}
@@ -66,38 +75,49 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
*/
static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
struct afs_vldb_entry *vldb,
- unsigned long type_mask)
+ struct afs_server_list **_slist)
{
struct afs_server_list *slist;
struct afs_volume *volume;
- int ret = -ENOMEM;
+ int ret = -ENOMEM, i;
volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
if (!volume)
goto error_0;
+ volume->debug_id = atomic_inc_return(&afs_volume_debug_id);
volume->vid = vldb->vid[params->type];
volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol);
volume->type = params->type;
volume->type_force = params->force;
volume->name_len = vldb->name_len;
+ volume->creation_time = TIME64_MIN;
+ volume->update_time = TIME64_MIN;
refcount_set(&volume->ref, 1);
INIT_HLIST_NODE(&volume->proc_link);
+ INIT_WORK(&volume->destructor, afs_destroy_volume);
rwlock_init(&volume->servers_lock);
+ mutex_init(&volume->volsync_lock);
+ mutex_init(&volume->cb_check_lock);
rwlock_init(&volume->cb_v_break_lock);
+ INIT_LIST_HEAD(&volume->open_mmaps);
+ init_rwsem(&volume->open_mmaps_lock);
memcpy(volume->name, vldb->name, vldb->name_len + 1);
- slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
+ for (i = 0; i < AFS_MAXTYPES; i++)
+ volume->vids[i] = vldb->vid[i];
+
+ slist = afs_alloc_server_list(volume, params->key, vldb);
if (IS_ERR(slist)) {
ret = PTR_ERR(slist);
goto error_1;
}
- refcount_set(&slist->usage, 1);
+ *_slist = slist;
rcu_assign_pointer(volume->servers, slist);
- trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
+ trace_afs_volume(volume->debug_id, volume->vid, 1, afs_volume_trace_alloc);
return volume;
error_1:
@@ -111,18 +131,20 @@ error_0:
* Look up or allocate a volume record.
*/
static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
- struct afs_vldb_entry *vldb,
- unsigned long type_mask)
+ struct afs_vldb_entry *vldb)
{
+ struct afs_server_list *slist;
struct afs_volume *candidate, *volume;
- candidate = afs_alloc_volume(params, vldb, type_mask);
+ candidate = afs_alloc_volume(params, vldb, &slist);
if (IS_ERR(candidate))
return candidate;
volume = afs_insert_volume_into_cell(params->cell, candidate);
- if (volume != candidate)
- afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup);
+ if (volume == candidate)
+ afs_attach_volume_to_servers(volume, slist);
+ else
+ afs_put_volume(candidate, afs_volume_trace_put_cell_dup);
return volume;
}
@@ -202,8 +224,7 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params)
goto error;
}
- type_mask = 1UL << params->type;
- volume = afs_lookup_volume(params, vldb, type_mask);
+ volume = afs_lookup_volume(params, vldb);
error:
kfree(vldb);
@@ -213,18 +234,22 @@ error:
/*
* Destroy a volume record
*/
-static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
+static void afs_destroy_volume(struct work_struct *work)
{
+ struct afs_volume *volume = container_of(work, struct afs_volume, destructor);
+ struct afs_server_list *slist = rcu_access_pointer(volume->servers);
+
_enter("%p", volume);
#ifdef CONFIG_AFS_FSCACHE
ASSERTCMP(volume->cache, ==, NULL);
#endif
+ afs_detach_volume_from_servers(volume, slist);
afs_remove_volume_from_cell(volume);
- afs_put_serverlist(net, rcu_access_pointer(volume->servers));
+ afs_put_serverlist(volume->cell->net, slist);
afs_put_cell(volume->cell, afs_cell_trace_put_vol);
- trace_afs_volume(volume->vid, refcount_read(&volume->ref),
+ trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref),
afs_volume_trace_free);
kfree_rcu(volume, rcu);
@@ -232,6 +257,20 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
}
/*
+ * Try to get a reference on a volume record.
+ */
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
+{
+ int r;
+
+ if (__refcount_inc_not_zero(&volume->ref, &r)) {
+ trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason);
+ return true;
+ }
+ return false;
+}
+
+/*
* Get a reference on a volume record.
*/
struct afs_volume *afs_get_volume(struct afs_volume *volume,
@@ -241,7 +280,7 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume,
int r;
__refcount_inc(&volume->ref, &r);
- trace_afs_volume(volume->vid, r + 1, reason);
+ trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason);
}
return volume;
}
@@ -250,18 +289,18 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume,
/*
* Drop a reference on a volume record.
*/
-void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
- enum afs_volume_trace reason)
+void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason)
{
if (volume) {
+ unsigned int debug_id = volume->debug_id;
afs_volid_t vid = volume->vid;
bool zero;
int r;
zero = __refcount_dec_and_test(&volume->ref, &r);
- trace_afs_volume(vid, r - 1, reason);
+ trace_afs_volume(debug_id, vid, r - 1, reason);
if (zero)
- afs_destroy_volume(net, volume);
+ schedule_work(&volume->destructor);
}
}
@@ -317,7 +356,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
{
struct afs_server_list *new, *old, *discard;
struct afs_vldb_entry *vldb;
- char idbuf[16];
+ char idbuf[24];
int ret, idsz;
_enter("");
@@ -325,7 +364,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
/* We look up an ID by passing it as a decimal string in the
* operation's name parameter.
*/
- idsz = sprintf(idbuf, "%llu", volume->vid);
+ idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
if (IS_ERR(vldb)) {
@@ -342,8 +381,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
}
/* See if the volume's server list got updated. */
- new = afs_alloc_server_list(volume->cell, key,
- vldb, (1 << volume->type));
+ new = afs_alloc_server_list(volume, key, vldb);
if (IS_ERR(new)) {
ret = PTR_ERR(new);
goto error_vldb;
@@ -362,11 +400,17 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
discard = old;
}
- volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+ /* Check more often if replication is ongoing. */
+ if (new->ro_replicating)
+ volume->update_at = ktime_get_real_seconds() + 10 * 60;
+ else
+ volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
write_unlock(&volume->servers_lock);
- ret = 0;
+ if (discard == old)
+ afs_reattach_volume_to_servers(volume, new, old);
afs_put_serverlist(volume->cell->net, discard);
+ ret = 0;
error_vldb:
kfree(vldb);
error:
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 19df10d63323..93ad86ff3345 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -12,352 +12,56 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "internal.h"
-static int afs_writepages_region(struct address_space *mapping,
- struct writeback_control *wbc,
- loff_t start, loff_t end, loff_t *_next,
- bool max_one_loop);
-
-static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len,
- loff_t i_size, bool caching);
-
-#ifdef CONFIG_AFS_FSCACHE
-/*
- * Mark a page as having been made dirty and thus needing writeback. We also
- * need to pin the cache object to write back to.
- */
-bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
- return fscache_dirty_folio(mapping, folio,
- afs_vnode_cache(AFS_FS_I(mapping->host)));
-}
-static void afs_folio_start_fscache(bool caching, struct folio *folio)
-{
- if (caching)
- folio_start_fscache(folio);
-}
-#else
-static void afs_folio_start_fscache(bool caching, struct folio *folio)
-{
-}
-#endif
-
-/*
- * Flush out a conflicting write. This may extend the write to the surrounding
- * pages if also dirty and contiguous to the conflicting region..
- */
-static int afs_flush_conflicting_write(struct address_space *mapping,
- struct folio *folio)
-{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_start = folio_pos(folio),
- .range_end = LLONG_MAX,
- };
- loff_t next;
-
- return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX,
- &next, true);
-}
-
-/*
- * prepare to perform part of a write to a page
- */
-int afs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **_page, void **fsdata)
-{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- struct folio *folio;
- unsigned long priv;
- unsigned f, from;
- unsigned t, to;
- pgoff_t index;
- int ret;
-
- _enter("{%llx:%llu},%llx,%x",
- vnode->fid.vid, vnode->fid.vnode, pos, len);
-
- /* Prefetch area to be written into the cache if we're caching this
- * file. We need to do this before we get a lock on the page in case
- * there's more than one writer competing for the same cache block.
- */
- ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
- if (ret < 0)
- return ret;
-
- index = folio_index(folio);
- from = pos - index * PAGE_SIZE;
- to = from + len;
-
-try_again:
- /* See if this page is already partially written in a way that we can
- * merge the new write with.
- */
- if (folio_test_private(folio)) {
- priv = (unsigned long)folio_get_private(folio);
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- ASSERTCMP(f, <=, t);
-
- if (folio_test_writeback(folio)) {
- trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
- folio_unlock(folio);
- goto wait_for_writeback;
- }
- /* If the file is being filled locally, allow inter-write
- * spaces to be merged into writes. If it's not, only write
- * back what the user gives us.
- */
- if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
- (to < f || from > t))
- goto flush_conflicting_write;
- }
-
- *_page = folio_file_page(folio, pos / PAGE_SIZE);
- _leave(" = 0");
- return 0;
-
- /* The previous write and this write aren't adjacent or overlapping, so
- * flush the page out.
- */
-flush_conflicting_write:
- trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio);
- folio_unlock(folio);
-
- ret = afs_flush_conflicting_write(mapping, folio);
- if (ret < 0)
- goto error;
-
-wait_for_writeback:
- ret = folio_wait_writeback_killable(folio);
- if (ret < 0)
- goto error;
-
- ret = folio_lock_killable(folio);
- if (ret < 0)
- goto error;
- goto try_again;
-
-error:
- folio_put(folio);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * finalise part of a write to a page
- */
-int afs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *subpage, void *fsdata)
-{
- struct folio *folio = page_folio(subpage);
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- unsigned long priv;
- unsigned int f, from = offset_in_folio(folio, pos);
- unsigned int t, to = from + copied;
- loff_t i_size, write_end_pos;
-
- _enter("{%llx:%llu},{%lx}",
- vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
-
- if (!folio_test_uptodate(folio)) {
- if (copied < len) {
- copied = 0;
- goto out;
- }
-
- folio_mark_uptodate(folio);
- }
-
- if (copied == 0)
- goto out;
-
- write_end_pos = pos + copied;
-
- i_size = i_size_read(&vnode->netfs.inode);
- if (write_end_pos > i_size) {
- write_seqlock(&vnode->cb_lock);
- i_size = i_size_read(&vnode->netfs.inode);
- if (write_end_pos > i_size)
- afs_set_i_size(vnode, write_end_pos);
- write_sequnlock(&vnode->cb_lock);
- fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos);
- }
-
- if (folio_test_private(folio)) {
- priv = (unsigned long)folio_get_private(folio);
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- if (from < f)
- f = from;
- if (to > t)
- t = to;
- priv = afs_folio_dirty(folio, f, t);
- folio_change_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
- } else {
- priv = afs_folio_dirty(folio, from, to);
- folio_attach_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
- }
-
- if (folio_mark_dirty(folio))
- _debug("dirtied %lx", folio_index(folio));
-
-out:
- folio_unlock(folio);
- folio_put(folio);
- return copied;
-}
-
-/*
- * kill all the pages in the given range
- */
-static void afs_kill_pages(struct address_space *mapping,
- loff_t start, loff_t len)
-{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct folio *folio;
- pgoff_t index = start / PAGE_SIZE;
- pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
- _enter("{%llx:%llu},%llx @%llx",
- vnode->fid.vid, vnode->fid.vnode, len, start);
-
- do {
- _debug("kill %lx (to %lx)", index, last);
-
- folio = filemap_get_folio(mapping, index);
- if (!folio) {
- next = index + 1;
- continue;
- }
-
- next = folio_next_index(folio);
-
- folio_clear_uptodate(folio);
- folio_end_writeback(folio);
- folio_lock(folio);
- generic_error_remove_page(mapping, &folio->page);
- folio_unlock(folio);
- folio_put(folio);
-
- } while (index = next, index <= last);
-
- _leave("");
-}
-
-/*
- * Redirty all the pages in a given range.
- */
-static void afs_redirty_pages(struct writeback_control *wbc,
- struct address_space *mapping,
- loff_t start, loff_t len)
-{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct folio *folio;
- pgoff_t index = start / PAGE_SIZE;
- pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
- _enter("{%llx:%llu},%llx @%llx",
- vnode->fid.vid, vnode->fid.vnode, len, start);
-
- do {
- _debug("redirty %llx @%llx", len, start);
-
- folio = filemap_get_folio(mapping, index);
- if (!folio) {
- next = index + 1;
- continue;
- }
-
- next = index + folio_nr_pages(folio);
- folio_redirty_for_writepage(wbc, folio);
- folio_end_writeback(folio);
- folio_put(folio);
- } while (index = next, index <= last);
-
- _leave("");
-}
-
/*
* completion of write to server
*/
static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
{
- struct address_space *mapping = vnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t end;
-
- XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
-
_enter("{%llx:%llu},{%x @%llx}",
vnode->fid.vid, vnode->fid.vnode, len, start);
- rcu_read_lock();
-
- end = (start + len - 1) / PAGE_SIZE;
- xas_for_each(&xas, folio, end) {
- if (!folio_test_writeback(folio)) {
- kdebug("bad %x @%llx page %lx %lx",
- len, start, folio_index(folio), end);
- ASSERT(folio_test_writeback(folio));
- }
-
- trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
- folio_detach_private(folio);
- folio_end_writeback(folio);
- }
-
- rcu_read_unlock();
-
afs_prune_wb_keys(vnode);
_leave("");
}
/*
* Find a key to use for the writeback. We cached the keys used to author the
- * writes on the vnode. *_wbk will contain the last writeback key used or NULL
- * and we need to start from there if it's set.
+ * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key
+ * record used or NULL and we need to start from there if it's set.
+ * wreq->netfs_priv will be set to the key itself or NULL.
*/
-static int afs_get_writeback_key(struct afs_vnode *vnode,
- struct afs_wb_key **_wbk)
+static void afs_get_writeback_key(struct netfs_io_request *wreq)
{
- struct afs_wb_key *wbk = NULL;
- struct list_head *p;
- int ret = -ENOKEY, ret2;
+ struct afs_wb_key *wbk, *old = wreq->netfs_priv2;
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+
+ key_put(wreq->netfs_priv);
+ wreq->netfs_priv = NULL;
+ wreq->netfs_priv2 = NULL;
spin_lock(&vnode->wb_lock);
- if (*_wbk)
- p = (*_wbk)->vnode_link.next;
+ if (old)
+ wbk = list_next_entry(old, vnode_link);
else
- p = vnode->wb_keys.next;
+ wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link);
- while (p != &vnode->wb_keys) {
- wbk = list_entry(p, struct afs_wb_key, vnode_link);
+ list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) {
_debug("wbk %u", key_serial(wbk->key));
- ret2 = key_validate(wbk->key);
- if (ret2 == 0) {
+ if (key_validate(wbk->key) == 0) {
refcount_inc(&wbk->usage);
+ wreq->netfs_priv = key_get(wbk->key);
+ wreq->netfs_priv2 = wbk;
_debug("USE WB KEY %u", key_serial(wbk->key));
break;
}
-
- wbk = NULL;
- if (ret == -ENOKEY)
- ret = ret2;
- p = p->next;
}
spin_unlock(&vnode->wb_lock);
- if (*_wbk)
- afs_put_wb_key(*_wbk);
- *_wbk = wbk;
- return 0;
+
+ afs_put_wb_key(old);
}
static void afs_store_data_success(struct afs_operation *op)
@@ -366,9 +70,8 @@ static void afs_store_data_success(struct afs_operation *op)
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
- if (op->error == 0) {
- if (!op->store.laundering)
- afs_pages_written_back(vnode, op->store.pos, op->store.size);
+ if (!afs_op_error(op)) {
+ afs_pages_written_back(vnode, op->store.pos, op->store.size);
afs_stat_v(vnode, n_stores);
atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
}
@@ -381,428 +84,153 @@ static const struct afs_operation_ops afs_store_data_operation = {
};
/*
- * write to a file
+ * Prepare a subrequest to write to the server. This sets the max_len
+ * parameter.
*/
-static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
- bool laundering)
+void afs_prepare_write(struct netfs_io_subrequest *subreq)
{
+ struct netfs_io_stream *stream = &subreq->rreq->io_streams[subreq->stream_nr];
+
+ //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags))
+ // subreq->max_len = 512 * 1024;
+ //else
+ stream->sreq_max_len = 256 * 1024 * 1024;
+}
+
+/*
+ * Issue a subrequest to write to the server.
+ */
+static void afs_issue_write_worker(struct work_struct *work)
+{
+ struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work);
+ struct netfs_io_request *wreq = subreq->rreq;
struct afs_operation *op;
- struct afs_wb_key *wbk = NULL;
- loff_t size = iov_iter_count(iter);
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+ unsigned long long pos = subreq->start + subreq->transferred;
+ size_t len = subreq->len - subreq->transferred;
int ret = -ENOKEY;
- _enter("%s{%llx:%llu.%u},%llx,%llx",
+ _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx",
+ wreq->debug_id, subreq->debug_index,
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- size, pos);
+ pos, len);
- ret = afs_get_writeback_key(vnode, &wbk);
- if (ret) {
- _leave(" = %d [no keys]", ret);
- return ret;
- }
+#if 0 // Error injection
+ if (subreq->debug_index == 3)
+ return netfs_write_subrequest_terminated(subreq, -ENOANO);
- op = afs_alloc_operation(wbk->key, vnode->volume);
- if (IS_ERR(op)) {
- afs_put_wb_key(wbk);
- return -ENOMEM;
+ if (!subreq->retry_count) {
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
}
+#endif
+
+ op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
+ if (IS_ERR(op))
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
afs_op_set_vnode(op, 0, vnode);
- op->file[0].dv_delta = 1;
+ op->file[0].dv_delta = 1;
op->file[0].modification = true;
- op->store.write_iter = iter;
- op->store.pos = pos;
- op->store.size = size;
- op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
- op->store.laundering = laundering;
- op->mtime = vnode->netfs.inode.i_mtime;
- op->flags |= AFS_OPERATION_UNINTR;
- op->ops = &afs_store_data_operation;
+ op->store.pos = pos;
+ op->store.size = len;
+ op->flags |= AFS_OPERATION_UNINTR;
+ op->ops = &afs_store_data_operation;
-try_next_key:
afs_begin_vnode_operation(op);
- afs_wait_for_operation(op);
- switch (op->error) {
+ op->store.write_iter = &subreq->io_iter;
+ op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size);
+ op->mtime = inode_get_mtime(&vnode->netfs.inode);
+
+ afs_wait_for_operation(op);
+ ret = afs_put_operation(op);
+ switch (ret) {
+ case 0:
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ break;
case -EACCES:
case -EPERM:
case -ENOKEY:
case -EKEYEXPIRED:
case -EKEYREJECTED:
case -EKEYREVOKED:
- _debug("next");
-
- ret = afs_get_writeback_key(vnode, &wbk);
- if (ret == 0) {
- key_put(op->key);
- op->key = key_get(wbk->key);
- goto try_next_key;
- }
+ /* If there are more keys we can try, use the retry algorithm
+ * to rotate the keys.
+ */
+ if (wreq->netfs_priv2)
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
break;
}
- afs_put_wb_key(wbk);
- _leave(" = %d", op->error);
- return afs_put_operation(op);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len);
+}
+
+void afs_issue_write(struct netfs_io_subrequest *subreq)
+{
+ subreq->work.func = afs_issue_write_worker;
+ if (!queue_work(system_dfl_wq, &subreq->work))
+ WARN_ON_ONCE(1);
}
/*
- * Extend the region to be written back to include subsequent contiguously
- * dirty pages if possible, but don't sleep while doing so.
- *
- * If this page holds new content, then we can include filler zeros in the
- * writeback.
+ * Writeback calls this when it finds a folio that needs uploading. This isn't
+ * called if writeback only has copy-to-cache to deal with.
*/
-static void afs_extend_writeback(struct address_space *mapping,
- struct afs_vnode *vnode,
- long *_count,
- loff_t start,
- loff_t max_len,
- bool new_content,
- bool caching,
- unsigned int *_len)
+void afs_begin_writeback(struct netfs_io_request *wreq)
{
- struct pagevec pvec;
- struct folio *folio;
- unsigned long priv;
- unsigned int psize, filler = 0;
- unsigned int f, t;
- loff_t len = *_len;
- pgoff_t index = (start + len) / PAGE_SIZE;
- bool stop = true;
- unsigned int i;
-
- XA_STATE(xas, &mapping->i_pages, index);
- pagevec_init(&pvec);
-
- do {
- /* Firstly, we gather up a batch of contiguous dirty pages
- * under the RCU read lock - but we can't clear the dirty flags
- * there if any of those pages are mapped.
- */
- rcu_read_lock();
-
- xas_for_each(&xas, folio, ULONG_MAX) {
- stop = true;
- if (xas_retry(&xas, folio))
- continue;
- if (xa_is_value(folio))
- break;
- if (folio_index(folio) != index)
- break;
-
- if (!folio_try_get_rcu(folio)) {
- xas_reset(&xas);
- continue;
- }
-
- /* Has the page moved or been split? */
- if (unlikely(folio != xas_reload(&xas))) {
- folio_put(folio);
- break;
- }
-
- if (!folio_trylock(folio)) {
- folio_put(folio);
- break;
- }
- if (!folio_test_dirty(folio) ||
- folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- folio_put(folio);
- break;
- }
-
- psize = folio_size(folio);
- priv = (unsigned long)folio_get_private(folio);
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- if (f != 0 && !new_content) {
- folio_unlock(folio);
- folio_put(folio);
- break;
- }
-
- len += filler + t;
- filler = psize - t;
- if (len >= max_len || *_count <= 0)
- stop = true;
- else if (t == psize || new_content)
- stop = false;
-
- index += folio_nr_pages(folio);
- if (!pagevec_add(&pvec, &folio->page))
- break;
- if (stop)
- break;
- }
-
- if (!stop)
- xas_pause(&xas);
- rcu_read_unlock();
-
- /* Now, if we obtained any pages, we can shift them to being
- * writable and mark them for caching.
- */
- if (!pagevec_count(&pvec))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- folio = page_folio(pvec.pages[i]);
- trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
-
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- if (folio_start_writeback(folio))
- BUG();
- afs_folio_start_fscache(caching, folio);
-
- *_count -= folio_nr_pages(folio);
- folio_unlock(folio);
- }
-
- pagevec_release(&pvec);
- cond_resched();
- } while (!stop);
-
- *_len = len;
+ if (S_ISREG(wreq->inode->i_mode))
+ afs_get_writeback_key(wreq);
}
/*
- * Synchronously write back the locked page and any subsequent non-locked dirty
- * pages.
+ * Prepare to retry the writes in request. Use this to try rotating the
+ * available writeback keys.
*/
-static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
- struct writeback_control *wbc,
- struct folio *folio,
- loff_t start, loff_t end)
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream)
{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct iov_iter iter;
- unsigned long priv;
- unsigned int offset, to, len, max_len;
- loff_t i_size = i_size_read(&vnode->netfs.inode);
- bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
- long count = wbc->nr_to_write;
- int ret;
-
- _enter(",%lx,%llx-%llx", folio_index(folio), start, end);
-
- if (folio_start_writeback(folio))
- BUG();
- afs_folio_start_fscache(caching, folio);
-
- count -= folio_nr_pages(folio);
-
- /* Find all consecutive lockable dirty pages that have contiguous
- * written regions, stopping when we find a page that is not
- * immediately lockable, is not dirty or is missing, or we reach the
- * end of the range.
- */
- priv = (unsigned long)folio_get_private(folio);
- offset = afs_folio_dirty_from(folio, priv);
- to = afs_folio_dirty_to(folio, priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
-
- len = to - offset;
- start += offset;
- if (start < i_size) {
- /* Trim the write to the EOF; the extra data is ignored. Also
- * put an upper limit on the size of a single storedata op.
- */
- max_len = 65536 * 4096;
- max_len = min_t(unsigned long long, max_len, end - start + 1);
- max_len = min_t(unsigned long long, max_len, i_size - start);
-
- if (len < max_len &&
- (to == folio_size(folio) || new_content))
- afs_extend_writeback(mapping, vnode, &count,
- start, max_len, new_content,
- caching, &len);
- len = min_t(loff_t, len, max_len);
- }
-
- /* We now have a contiguous set of dirty pages, each with writeback
- * set; the first page is still locked at this point, but all the rest
- * have been unlocked.
- */
- folio_unlock(folio);
-
- if (start < i_size) {
- _debug("write back %x @%llx [%llx]", len, start, i_size);
-
- /* Speculatively write to the cache. We have to fix this up
- * later if the store fails.
- */
- afs_write_to_cache(vnode, start, len, i_size, caching);
-
- iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len);
- ret = afs_store_data(vnode, &iter, start, false);
- } else {
- _debug("write discard %x @%llx [%llx]", len, start, i_size);
-
- /* The dirty region was entirely beyond the EOF. */
- fscache_clear_page_bits(mapping, start, len, caching);
- afs_pages_written_back(vnode, start, len);
- ret = 0;
- }
-
- switch (ret) {
- case 0:
- wbc->nr_to_write = count;
- ret = len;
+ struct netfs_io_subrequest *subreq =
+ list_first_entry(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+
+ switch (wreq->origin) {
+ case NETFS_READAHEAD:
+ case NETFS_READPAGE:
+ case NETFS_READ_GAPS:
+ case NETFS_READ_SINGLE:
+ case NETFS_READ_FOR_WRITE:
+ case NETFS_UNBUFFERED_READ:
+ case NETFS_DIO_READ:
+ return;
+ default:
break;
+ }
- default:
- pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
- fallthrough;
+ switch (subreq->error) {
case -EACCES:
case -EPERM:
case -ENOKEY:
case -EKEYEXPIRED:
case -EKEYREJECTED:
case -EKEYREVOKED:
- case -ENETRESET:
- afs_redirty_pages(wbc, mapping, start, len);
- mapping_set_error(mapping, ret);
- break;
-
- case -EDQUOT:
- case -ENOSPC:
- afs_redirty_pages(wbc, mapping, start, len);
- mapping_set_error(mapping, -ENOSPC);
- break;
-
- case -EROFS:
- case -EIO:
- case -EREMOTEIO:
- case -EFBIG:
- case -ENOENT:
- case -ENOMEDIUM:
- case -ENXIO:
- trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
- afs_kill_pages(mapping, start, len);
- mapping_set_error(mapping, ret);
+ afs_get_writeback_key(wreq);
+ if (!wreq->netfs_priv)
+ stream->failed = true;
break;
}
-
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * write a region of pages back to the server
- */
-static int afs_writepages_region(struct address_space *mapping,
- struct writeback_control *wbc,
- loff_t start, loff_t end, loff_t *_next,
- bool max_one_loop)
-{
- struct folio *folio;
- struct page *head_page;
- ssize_t ret;
- int n, skips = 0;
-
- _enter("%llx,%llx,", start, end);
-
- do {
- pgoff_t index = start / PAGE_SIZE;
-
- n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
- PAGECACHE_TAG_DIRTY, 1, &head_page);
- if (!n)
- break;
-
- folio = page_folio(head_page);
- start = folio_pos(folio); /* May regress with THPs */
-
- _debug("wback %lx", folio_index(folio));
-
- /* At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
- */
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0) {
- folio_put(folio);
- return ret;
- }
- } else {
- if (!folio_trylock(folio)) {
- folio_put(folio);
- return 0;
- }
- }
-
- if (folio_mapping(folio) != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- folio_put(folio);
- continue;
- }
-
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode != WB_SYNC_NONE) {
- folio_wait_writeback(folio);
-#ifdef CONFIG_AFS_FSCACHE
- folio_wait_fscache(folio);
-#endif
- } else {
- start += folio_size(folio);
- }
- folio_put(folio);
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (skips >= 5 || need_resched())
- break;
- skips++;
- }
- continue;
- }
-
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
- folio_put(folio);
- if (ret < 0) {
- _leave(" = %zd", ret);
- return ret;
- }
-
- start += ret;
-
- if (max_one_loop)
- break;
-
- cond_resched();
- } while (wbc->nr_to_write > 0);
-
- *_next = start;
- _leave(" = 0 [%llx]", *_next);
- return 0;
}
/*
* write some of the pending data back to the server
*/
-int afs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+int afs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- loff_t start, next;
int ret;
- _enter("");
-
/* We have to be careful as we can end up racing with setattr()
* truncating the pagecache since the caller doesn't take a lock here
* to prevent it.
@@ -812,69 +240,12 @@ int afs_writepages(struct address_space *mapping,
else if (!down_read_trylock(&vnode->validate_lock))
return 0;
- if (wbc->range_cyclic) {
- start = mapping->writeback_index * PAGE_SIZE;
- ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX,
- &next, false);
- if (ret == 0) {
- mapping->writeback_index = next / PAGE_SIZE;
- if (start > 0 && wbc->nr_to_write > 0) {
- ret = afs_writepages_region(mapping, wbc, 0,
- start, &next, false);
- if (ret == 0)
- mapping->writeback_index =
- next / PAGE_SIZE;
- }
- }
- } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
- ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX,
- &next, false);
- if (wbc->nr_to_write > 0 && ret == 0)
- mapping->writeback_index = next / PAGE_SIZE;
- } else {
- ret = afs_writepages_region(mapping, wbc,
- wbc->range_start, wbc->range_end,
- &next, false);
- }
-
+ ret = netfs_writepages(mapping, wbc);
up_read(&vnode->validate_lock);
- _leave(" = %d", ret);
return ret;
}
/*
- * write to an AFS file
- */
-ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
-{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
- struct afs_file *af = iocb->ki_filp->private_data;
- ssize_t result;
- size_t count = iov_iter_count(from);
-
- _enter("{%llx:%llu},{%zu},",
- vnode->fid.vid, vnode->fid.vnode, count);
-
- if (IS_SWAPFILE(&vnode->netfs.inode)) {
- printk(KERN_INFO
- "AFS: Attempt to write to active swap file!\n");
- return -EBUSY;
- }
-
- if (!count)
- return 0;
-
- result = afs_validate(vnode, af->key);
- if (result < 0)
- return result;
-
- result = generic_file_write_iter(iocb, from);
-
- _leave(" = %zd", result);
- return result;
-}
-
-/*
* flush any dirty pages for this process, and check for write errors.
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
@@ -902,59 +273,11 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
{
- struct folio *folio = page_folio(vmf->page);
struct file *file = vmf->vma->vm_file;
- struct inode *inode = file_inode(file);
- struct afs_vnode *vnode = AFS_FS_I(inode);
- struct afs_file *af = file->private_data;
- unsigned long priv;
- vm_fault_t ret = VM_FAULT_RETRY;
-
- _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
-
- afs_validate(vnode, af->key);
-
- sb_start_pagefault(inode->i_sb);
-
- /* Wait for the page to be written to the cache before we allow it to
- * be modified. We then assume the entire page will need writing back.
- */
-#ifdef CONFIG_AFS_FSCACHE
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0)
- goto out;
-#endif
-
- if (folio_wait_writeback_killable(folio))
- goto out;
-
- if (folio_lock_killable(folio) < 0)
- goto out;
- /* We mustn't change folio->private until writeback is complete as that
- * details the portion of the page we need to write back and we might
- * need to redirty the page if there's a problem.
- */
- if (folio_wait_writeback_killable(folio) < 0) {
- folio_unlock(folio);
- goto out;
- }
-
- priv = afs_folio_dirty(folio, 0, folio_size(folio));
- priv = afs_folio_dirty_mmapped(priv);
- if (folio_test_private(folio)) {
- folio_change_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
- } else {
- folio_attach_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
- }
- file_update_time(file);
-
- ret = VM_FAULT_LOCKED;
-out:
- sb_end_pagefault(inode->i_sb);
- return ret;
+ if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0)
+ return VM_FAULT_SIGBUS;
+ return netfs_page_mkwrite(vmf, NULL);
}
/*
@@ -984,66 +307,3 @@ void afs_prune_wb_keys(struct afs_vnode *vnode)
afs_put_wb_key(wbk);
}
}
-
-/*
- * Clean up a page during invalidation.
- */
-int afs_launder_folio(struct folio *folio)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
- struct iov_iter iter;
- struct bio_vec bv[1];
- unsigned long priv;
- unsigned int f, t;
- int ret = 0;
-
- _enter("{%lx}", folio->index);
-
- priv = (unsigned long)folio_get_private(folio);
- if (folio_clear_dirty_for_io(folio)) {
- f = 0;
- t = folio_size(folio);
- if (folio_test_private(folio)) {
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- }
-
- bv[0].bv_page = &folio->page;
- bv[0].bv_offset = f;
- bv[0].bv_len = t - f;
- iov_iter_bvec(&iter, ITER_SOURCE, bv, 1, bv[0].bv_len);
-
- trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
- ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
- }
-
- trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
- folio_detach_private(folio);
- folio_wait_fscache(folio);
- return ret;
-}
-
-/*
- * Deal with the completion of writing the data to the cache.
- */
-static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
- bool was_async)
-{
- struct afs_vnode *vnode = priv;
-
- if (IS_ERR_VALUE(transferred_or_error) &&
- transferred_or_error != -ENOBUFS)
- afs_invalidate_cache(vnode, 0);
-}
-
-/*
- * Save the write to the cache also.
- */
-static void afs_write_to_cache(struct afs_vnode *vnode,
- loff_t start, size_t len, loff_t i_size,
- bool caching)
-{
- fscache_write_to_cache(afs_vnode_cache(vnode),
- vnode->netfs.inode.i_mapping, start, len, i_size,
- afs_write_to_cache_done, vnode, caching);
-}
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 7751b0b3f81d..e19f396aa370 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -75,7 +75,7 @@ static bool afs_make_acl(struct afs_operation *op,
{
struct afs_acl *acl;
- acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
+ acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
if (!acl) {
afs_op_nomem(op);
return false;
@@ -97,7 +97,7 @@ static const struct afs_operation_ops afs_store_acl_operation = {
* Set a file's AFS3 ACL.
*/
static int afs_xattr_set_acl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct inode *inode, const char *name,
const void *buffer, size_t size, int flags)
@@ -228,7 +228,7 @@ static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
* Set a file's YFS ACL.
*/
static int afs_xattr_set_yfs(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct inode *inode, const char *name,
const void *buffer, size_t size, int flags)
@@ -353,7 +353,7 @@ static const struct xattr_handler afs_xattr_afs_volume_handler = {
.get = afs_xattr_get_volume,
};
-const struct xattr_handler *afs_xattr_handlers[] = {
+const struct xattr_handler * const afs_xattr_handlers[] = {
&afs_xattr_afs_acl_handler,
&afs_xattr_afs_cell_handler,
&afs_xattr_afs_fid_handler,
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
index 8ca868164507..cc5f143d21a3 100644
--- a/fs/afs/xdr_fs.h
+++ b/fs/afs/xdr_fs.h
@@ -88,7 +88,7 @@ union afs_xdr_dir_block {
struct {
struct afs_xdr_dir_hdr hdr;
- u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS];
+ u8 alloc_ctrs[AFS_DIR_BLOCKS_WITH_CTR];
__be16 hashtable[AFS_DIR_HASHTBL_SIZE];
} meta;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 11571cca86c1..febf13a49f0b 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -245,12 +245,15 @@ static void xdr_decode_YFSVolSync(const __be32 **_bp,
struct afs_volsync *volsync)
{
struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
- u64 creation;
+ u64 creation, update;
if (volsync) {
creation = xdr_to_u64(x->vol_creation_date);
do_div(creation, 10 * 1000 * 1000);
volsync->creation = creation;
+ update = xdr_to_u64(x->vol_update_date);
+ do_div(update, 10 * 1000 * 1000);
+ volsync->update = update;
}
*_bp += xdr_size(x);
@@ -349,18 +352,19 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call)
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
{
struct afs_operation *op = call->op;
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
const __be32 *bp;
+ size_t count_before;
int ret;
_enter("{%u,%zu, %zu/%llu}",
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
- req->actual_len);
+ call->remaining);
switch (call->unmarshall) {
case 0:
- req->actual_len = 0;
+ call->remaining = 0;
afs_extract_to_tmp64(call);
call->unmarshall++;
fallthrough;
@@ -375,38 +379,39 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
if (ret < 0)
return ret;
- req->actual_len = be64_to_cpu(call->tmp64);
- _debug("DATA length: %llu", req->actual_len);
+ call->remaining = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", call->remaining);
- if (req->actual_len == 0)
+ if (call->remaining == 0)
goto no_more_data;
- call->iter = req->iter;
- call->iov_len = min(req->actual_len, req->len);
+ call->iter = &subreq->io_iter;
+ call->iov_len = min(call->remaining, subreq->len - subreq->transferred);
call->unmarshall++;
fallthrough;
/* extract the returned data */
case 2:
- _debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->actual_len);
+ count_before = call->iov_len;
+ _debug("extract data %zu/%llu", count_before, call->remaining);
ret = afs_extract_data(call, true);
+ subreq->transferred += count_before - call->iov_len;
if (ret < 0)
return ret;
call->iter = &call->def_iter;
- if (req->actual_len <= req->len)
+ if (call->remaining)
goto no_more_data;
/* Discard any excess data the server gave us */
- afs_extract_discard(call, req->actual_len - req->len);
+ afs_extract_discard(call, call->remaining);
call->unmarshall = 3;
fallthrough;
case 3:
_debug("extract discard %zu/%llu",
- iov_iter_count(call->iter), req->actual_len - req->len);
+ iov_iter_count(call->iter), call->remaining);
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -431,8 +436,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
xdr_decode_YFSCallBack(&bp, call, &vp->scb);
xdr_decode_YFSVolSync(&bp, &op->volsync);
- req->data_version = vp->scb.status.data_version;
- req->file_size = vp->scb.status.size;
+ if (subreq->start + subreq->transferred >= vp->scb.status.size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
call->unmarshall++;
fallthrough;
@@ -451,7 +456,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
static const struct afs_call_type yfs_RXYFSFetchData64 = {
.name = "YFS.FetchData64",
.op = yfs_FS_FetchData64,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = yfs_deliver_fs_fetch_data64,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
@@ -460,14 +467,15 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = {
*/
void yfs_fs_fetch_data(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
struct afs_call *call;
__be32 *bp;
- _enter(",%x,{%llx:%llu},%llx,%llx",
+ _enter(",%x,{%llx:%llu},%llx,%zx",
key_serial(op->key), vp->fid.vid, vp->fid.vnode,
- req->pos, req->len);
+ subreq->start + subreq->transferred,
+ subreq->len - subreq->transferred);
call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64,
sizeof(__be32) * 2 +
@@ -479,17 +487,19 @@ void yfs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- req->call_debug_id = call->debug_id;
+ if (op->flags & AFS_OPERATION_ASYNC)
+ call->async = true;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSFETCHDATA64);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &vp->fid);
- bp = xdr_encode_u64(bp, req->pos);
- bp = xdr_encode_u64(bp, req->len);
+ bp = xdr_encode_u64(bp, subreq->start + subreq->transferred);
+ bp = xdr_encode_u64(bp, subreq->len - subreq->transferred);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -572,6 +582,7 @@ void yfs_fs_create_file(struct afs_operation *op)
bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -620,6 +631,7 @@ void yfs_fs_make_dir(struct afs_operation *op)
bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -655,8 +667,9 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call)
static void yfs_done_fs_remove_file2(struct afs_call *call)
{
if (call->error == -ECONNABORTED &&
- call->abort_code == RX_INVALID_OPERATION) {
- set_bit(AFS_SERVER_FL_NO_RM2, &call->server->flags);
+ (call->abort_code == RX_INVALID_OPERATION ||
+ call->abort_code == RXGEN_OPCODE)) {
+ set_bit(AFS_SERVER_FL_NO_RM2, &call->op->server->flags);
call->op->flags |= AFS_OPERATION_DOWNGRADE;
}
}
@@ -704,6 +717,7 @@ void yfs_fs_remove_file2(struct afs_operation *op)
bp = xdr_encode_name(bp, name);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -773,6 +787,7 @@ void yfs_fs_remove_file(struct afs_operation *op)
bp = xdr_encode_name(bp, name);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -814,6 +829,7 @@ void yfs_fs_remove_dir(struct afs_operation *op)
bp = xdr_encode_name(bp, name);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -887,6 +903,7 @@ void yfs_fs_link(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call1(call, &vp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -968,6 +985,7 @@ void yfs_fs_symlink(struct afs_operation *op)
bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1024,6 +1042,9 @@ void yfs_fs_rename(struct afs_operation *op)
_enter("");
+ if (!test_bit(AFS_SERVER_FL_NO_RENAME2, &op->server->flags))
+ return yfs_fs_rename_replace(op);
+
call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename,
sizeof(__be32) +
sizeof(struct yfs_xdr_RPCFlags) +
@@ -1047,6 +1068,253 @@ void yfs_fs_rename(struct afs_operation *op)
bp = xdr_encode_name(bp, new_name);
yfs_check_req(call, bp);
+ call->fid = orig_dvp->fid;
+ trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename_NoReplace operation. This does not
+ * return the status of a displaced target inode as there cannot be one.
+ */
+static int yfs_deliver_fs_rename_1(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ struct afs_vnode_param *old_vp = &op->more_files[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ /* If the two dirs are the same, we have two copies of the same status
+ * report, so we just decode it twice.
+ */
+ xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb);
+ xdr_decode_YFSFid(&bp, &old_vp->fid);
+ xdr_decode_YFSFetchStatus(&bp, call, &old_vp->scb);
+ xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * Deliver reply data to a YFS.Rename_Replace or a YFS.Rename_Exchange
+ * operation. These return the status of the displaced target inode if there
+ * was one.
+ */
+static int yfs_deliver_fs_rename_2(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ struct afs_vnode_param *old_vp = &op->more_files[0];
+ struct afs_vnode_param *new_vp = &op->more_files[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ /* If the two dirs are the same, we have two copies of the same status
+ * report, so we just decode it twice.
+ */
+ xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb);
+ xdr_decode_YFSFid(&bp, &old_vp->fid);
+ xdr_decode_YFSFetchStatus(&bp, call, &old_vp->scb);
+ xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb);
+ xdr_decode_YFSFid(&bp, &new_vp->fid);
+ xdr_decode_YFSFetchStatus(&bp, call, &new_vp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void yfs_done_fs_rename_replace(struct afs_call *call)
+{
+ if (call->error == -ECONNABORTED &&
+ (call->abort_code == RX_INVALID_OPERATION ||
+ call->abort_code == RXGEN_OPCODE)) {
+ set_bit(AFS_SERVER_FL_NO_RENAME2, &call->op->server->flags);
+ call->op->flags |= AFS_OPERATION_DOWNGRADE;
+ }
+}
+
+/*
+ * YFS.Rename_Replace operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename_Replace = {
+ .name = "FS.Rename_Replace",
+ .op = yfs_FS_Rename_Replace,
+ .deliver = yfs_deliver_fs_rename_2,
+ .done = yfs_done_fs_rename_replace,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.Rename_NoReplace operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename_NoReplace = {
+ .name = "FS.Rename_NoReplace",
+ .op = yfs_FS_Rename_NoReplace,
+ .deliver = yfs_deliver_fs_rename_1,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.Rename_Exchange operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename_Exchange = {
+ .name = "FS.Rename_Exchange",
+ .op = yfs_FS_Rename_Exchange,
+ .deliver = yfs_deliver_fs_rename_2,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory, replacing the target if it exists. The status
+ * of a displaced target is returned.
+ */
+void yfs_fs_rename_replace(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const struct qstr *orig_name = &op->dentry->d_name;
+ const struct qstr *new_name = &op->dentry_2->d_name;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename_Replace,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(orig_name->len) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(new_name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* Marshall the parameters. */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRENAME_REPLACE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+ bp = xdr_encode_name(bp, orig_name);
+ bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+ bp = xdr_encode_name(bp, new_name);
+ yfs_check_req(call, bp);
+
+ call->fid = orig_dvp->fid;
+ trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Rename a file or directory, failing if the target dirent exists.
+ */
+void yfs_fs_rename_noreplace(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const struct qstr *orig_name = &op->dentry->d_name;
+ const struct qstr *new_name = &op->dentry_2->d_name;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename_NoReplace,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(orig_name->len) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(new_name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* Marshall the parameters. */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRENAME_NOREPLACE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+ bp = xdr_encode_name(bp, orig_name);
+ bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+ bp = xdr_encode_name(bp, new_name);
+ yfs_check_req(call, bp);
+
+ call->fid = orig_dvp->fid;
+ trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Exchange a pair of files directories.
+ */
+void yfs_fs_rename_exchange(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const struct qstr *orig_name = &op->dentry->d_name;
+ const struct qstr *new_name = &op->dentry_2->d_name;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename_Exchange,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(orig_name->len) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(new_name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* Marshall the parameters. */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRENAME_EXCHANGE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+ bp = xdr_encode_name(bp, orig_name);
+ bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+ bp = xdr_encode_name(bp, new_name);
+ yfs_check_req(call, bp);
+
+ call->fid = orig_dvp->fid;
trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1102,6 +1370,7 @@ void yfs_fs_store_data(struct afs_operation *op)
bp = xdr_encode_u64(bp, op->store.i_size);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1158,6 +1427,7 @@ static void yfs_fs_setattr_size(struct afs_operation *op)
bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1196,6 +1466,7 @@ void yfs_fs_setattr(struct afs_operation *op)
bp = xdr_encode_YFS_StoreStatus(bp, attr);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1366,6 +1637,7 @@ void yfs_fs_get_volume_status(struct afs_operation *op)
bp = xdr_encode_u64(bp, vp->fid.vid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1430,6 +1702,7 @@ void yfs_fs_set_lock(struct afs_operation *op)
bp = xdr_encode_u32(bp, op->lock.type);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1460,6 +1733,7 @@ void yfs_fs_extend_lock(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1490,6 +1764,7 @@ void yfs_fs_release_lock(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1556,6 +1831,7 @@ void yfs_fs_fetch_status(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1736,6 +2012,7 @@ void yfs_fs_inline_bulk_status(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1898,6 +2175,7 @@ void yfs_fs_fetch_opaque_acl(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
@@ -1948,6 +2226,7 @@ void yfs_fs_store_opaque_acl2(struct afs_operation *op)
bp += size / sizeof(__be32);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}