summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/afs/Makefile2
-rw-r--r--fs/afs/addr_list.c224
-rw-r--r--fs/afs/addr_prefs.c531
-rw-r--r--fs/afs/afs.h3
-rw-r--r--fs/afs/callback.c141
-rw-r--r--fs/afs/cell.c5
-rw-r--r--fs/afs/cmservice.c5
-rw-r--r--fs/afs/dir.c66
-rw-r--r--fs/afs/dir_silly.c2
-rw-r--r--fs/afs/file.c20
-rw-r--r--fs/afs/fs_operation.c85
-rw-r--r--fs/afs/fs_probe.c323
-rw-r--r--fs/afs/fsclient.c74
-rw-r--r--fs/afs/inode.c204
-rw-r--r--fs/afs/internal.h370
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/misc.c10
-rw-r--r--fs/afs/proc.c102
-rw-r--r--fs/afs/rotate.c520
-rw-r--r--fs/afs/rxrpc.c107
-rw-r--r--fs/afs/server.c135
-rw-r--r--fs/afs/server_list.c174
-rw-r--r--fs/afs/super.c7
-rw-r--r--fs/afs/validation.c473
-rw-r--r--fs/afs/vl_alias.c69
-rw-r--r--fs/afs/vl_list.c29
-rw-r--r--fs/afs/vl_probe.c60
-rw-r--r--fs/afs/vl_rotate.c215
-rw-r--r--fs/afs/vlclient.c143
-rw-r--r--fs/afs/volume.c61
-rw-r--r--fs/afs/write.c6
-rw-r--r--fs/afs/yfsclient.c25
-rw-r--r--include/net/af_rxrpc.h15
-rw-r--r--include/trace/events/afs.h779
-rw-r--r--include/trace/events/rxrpc.h3
-rw-r--r--net/rxrpc/af_rxrpc.c62
-rw-r--r--net/rxrpc/ar-internal.h6
-rw-r--r--net/rxrpc/call_object.c17
-rw-r--r--net/rxrpc/conn_client.c10
-rw-r--r--net/rxrpc/conn_service.c3
-rw-r--r--net/rxrpc/net_ns.c4
-rw-r--r--net/rxrpc/peer_object.c58
-rw-r--r--net/rxrpc/proc.c76
-rw-r--r--net/rxrpc/sendmsg.c11
44 files changed, 3544 insertions, 1692 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index e8956b65d7ff..dcdc0f1bb76f 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -5,6 +5,7 @@
kafs-y := \
addr_list.o \
+ addr_prefs.o \
callback.o \
cell.o \
cmservice.o \
@@ -27,6 +28,7 @@ kafs-y := \
server.o \
server_list.o \
super.o \
+ validation.o \
vlclient.o \
vl_alias.o \
vl_list.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index de1ae0bead3b..6d42f85c6be5 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -13,26 +13,55 @@
#include "internal.h"
#include "afs_fs.h"
+static void afs_free_addrlist(struct rcu_head *rcu)
+{
+ struct afs_addr_list *alist = container_of(rcu, struct afs_addr_list, rcu);
+ unsigned int i;
+
+ for (i = 0; i < alist->nr_addrs; i++)
+ rxrpc_kernel_put_peer(alist->addrs[i].peer);
+ trace_afs_alist(alist->debug_id, refcount_read(&alist->usage), afs_alist_trace_free);
+ kfree(alist);
+}
+
/*
* Release an address list.
*/
-void afs_put_addrlist(struct afs_addr_list *alist)
+void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
+{
+ unsigned int debug_id;
+ bool dead;
+ int r;
+
+ if (!alist)
+ return;
+ debug_id = alist->debug_id;
+ dead = __refcount_dec_and_test(&alist->usage, &r);
+ trace_afs_alist(debug_id, r - 1, reason);
+ if (dead)
+ call_rcu(&alist->rcu, afs_free_addrlist);
+}
+
+struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
{
- if (alist && refcount_dec_and_test(&alist->usage))
- kfree_rcu(alist, rcu);
+ int r;
+
+ if (alist) {
+ __refcount_inc(&alist->usage, &r);
+ trace_afs_alist(alist->debug_id, r + 1, reason);
+ }
+ return alist;
}
/*
* Allocate an address list.
*/
-struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
- unsigned short service,
- unsigned short port)
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr)
{
struct afs_addr_list *alist;
- unsigned int i;
+ static atomic_t debug_id;
- _enter("%u,%u,%u", nr, service, port);
+ _enter("%u", nr);
if (nr > AFS_MAX_ADDRESSES)
nr = AFS_MAX_ADDRESSES;
@@ -43,17 +72,8 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
refcount_set(&alist->usage, 1);
alist->max_addrs = nr;
-
- for (i = 0; i < nr; i++) {
- struct sockaddr_rxrpc *srx = &alist->addrs[i];
- srx->srx_family = AF_RXRPC;
- srx->srx_service = service;
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin6);
- srx->transport.sin6.sin6_family = AF_INET6;
- srx->transport.sin6.sin6_port = htons(port);
- }
-
+ alist->debug_id = atomic_inc_return(&debug_id);
+ trace_afs_alist(alist->debug_id, 1, afs_alist_trace_alloc);
return alist;
}
@@ -126,7 +146,7 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
if (!vllist->servers[0].server)
goto error_vl;
- alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+ alist = afs_alloc_addrlist(nr);
if (!alist)
goto error;
@@ -197,9 +217,11 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
}
if (family == AF_INET)
- afs_merge_fs_addr4(alist, x[0], xport);
+ ret = afs_merge_fs_addr4(net, alist, x[0], xport);
else
- afs_merge_fs_addr6(alist, x, xport);
+ ret = afs_merge_fs_addr6(net, alist, x, xport);
+ if (ret < 0)
+ goto error;
} while (p < end);
@@ -216,26 +238,13 @@ bad_address:
problem, p - text, (int)len, (int)len, text);
ret = -EINVAL;
error:
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
error_vl:
afs_put_vlserverlist(net, vllist);
return ERR_PTR(ret);
}
/*
- * Compare old and new address lists to see if there's been any change.
- * - How to do this in better than O(Nlog(N)) time?
- * - We don't really want to sort the address list, but would rather take the
- * list as we got it so as not to undo record rotation by the DNS server.
- */
-#if 0
-static int afs_cmp_addr_list(const struct afs_addr_list *a1,
- const struct afs_addr_list *a2)
-{
-}
-#endif
-
-/*
* Perform a DNS query for VL servers and build a up an address list.
*/
struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
@@ -271,25 +280,33 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
/*
* Merge an IPv4 entry into a fileserver address list.
*/
-void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
+int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist,
+ __be32 xdr, u16 port)
{
- struct sockaddr_rxrpc *srx;
- u32 addr = ntohl(xdr);
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
int i;
if (alist->nr_addrs >= alist->max_addrs)
- return;
+ return 0;
- for (i = 0; i < alist->nr_ipv4; i++) {
- struct sockaddr_in *a = &alist->addrs[i].transport.sin;
- u32 a_addr = ntohl(a->sin_addr.s_addr);
- u16 a_port = ntohs(a->sin_port);
+ srx.srx_family = AF_RXRPC;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin);
+ srx.transport.sin.sin_family = AF_INET;
+ srx.transport.sin.sin_port = htons(port);
+ srx.transport.sin.sin_addr.s_addr = xdr;
- if (addr == a_addr && port == a_port)
- return;
- if (addr == a_addr && port < a_port)
- break;
- if (addr < a_addr)
+ peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
+ if (!peer)
+ return -ENOMEM;
+
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ if (peer == alist->addrs[i].peer) {
+ rxrpc_kernel_put_peer(peer);
+ return 0;
+ }
+ if (peer <= alist->addrs[i].peer)
break;
}
@@ -298,38 +315,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
- srx = &alist->addrs[i];
- srx->srx_family = AF_RXRPC;
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.sin.sin_family = AF_INET;
- srx->transport.sin.sin_port = htons(port);
- srx->transport.sin.sin_addr.s_addr = xdr;
+ alist->addrs[i].peer = peer;
alist->nr_ipv4++;
alist->nr_addrs++;
+ return 0;
}
/*
* Merge an IPv6 entry into a fileserver address list.
*/
-void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
+int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist,
+ __be32 *xdr, u16 port)
{
- struct sockaddr_rxrpc *srx;
- int i, diff;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
+ int i;
if (alist->nr_addrs >= alist->max_addrs)
- return;
+ return 0;
- for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
- struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6;
- u16 a_port = ntohs(a->sin6_port);
+ srx.srx_family = AF_RXRPC;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(port);
+ memcpy(&srx.transport.sin6.sin6_addr, xdr, 16);
- diff = memcmp(xdr, &a->sin6_addr, 16);
- if (diff == 0 && port == a_port)
- return;
- if (diff == 0 && port < a_port)
- break;
- if (diff < 0)
+ peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
+ if (!peer)
+ return -ENOMEM;
+
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ if (peer == alist->addrs[i].peer) {
+ rxrpc_kernel_put_peer(peer);
+ return 0;
+ }
+ if (peer <= alist->addrs[i].peer)
break;
}
@@ -337,68 +358,7 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
memmove(alist->addrs + i + 1,
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
-
- srx = &alist->addrs[i];
- srx->srx_family = AF_RXRPC;
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin6);
- srx->transport.sin6.sin6_family = AF_INET6;
- srx->transport.sin6.sin6_port = htons(port);
- memcpy(&srx->transport.sin6.sin6_addr, xdr, 16);
+ alist->addrs[i].peer = peer;
alist->nr_addrs++;
-}
-
-/*
- * Get an address to try.
- */
-bool afs_iterate_addresses(struct afs_addr_cursor *ac)
-{
- unsigned long set, failed;
- int index;
-
- if (!ac->alist)
- return false;
-
- set = ac->alist->responded;
- failed = ac->alist->failed;
- _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
-
- ac->nr_iterations++;
-
- set &= ~(failed | ac->tried);
-
- if (!set)
- return false;
-
- index = READ_ONCE(ac->alist->preferred);
- if (test_bit(index, &set))
- goto selected;
-
- index = __ffs(set);
-
-selected:
- ac->index = index;
- set_bit(index, &ac->tried);
- ac->responded = false;
- return true;
-}
-
-/*
- * Release an address list cursor.
- */
-int afs_end_cursor(struct afs_addr_cursor *ac)
-{
- struct afs_addr_list *alist;
-
- alist = ac->alist;
- if (alist) {
- if (ac->responded &&
- ac->index != alist->preferred &&
- test_bit(ac->alist->preferred, &ac->tried))
- WRITE_ONCE(alist->preferred, ac->index);
- afs_put_addrlist(alist);
- ac->alist = NULL;
- }
-
- return ac->error;
+ return 0;
}
diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c
new file mode 100644
index 000000000000..a189ff8a5034
--- /dev/null
+++ b/fs/afs/addr_prefs.c
@@ -0,0 +1,531 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Address preferences management
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": addr_prefs: " fmt
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/seq_file.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
+{
+ return afs_net(seq_file_single_net(m));
+}
+
+/*
+ * Split a NUL-terminated string up to the first newline around spaces. The
+ * source string will be modified to have NUL-terminations inserted.
+ */
+static int afs_split_string(char **pbuf, char *strv[], unsigned int maxstrv)
+{
+ unsigned int count = 0;
+ char *p = *pbuf;
+
+ maxstrv--; /* Allow for terminal NULL */
+ for (;;) {
+ /* Skip over spaces */
+ while (isspace(*p)) {
+ if (*p == '\n') {
+ p++;
+ break;
+ }
+ p++;
+ }
+ if (!*p)
+ break;
+
+ /* Mark start of word */
+ if (count >= maxstrv) {
+ pr_warn("Too many elements in string\n");
+ return -EINVAL;
+ }
+ strv[count++] = p;
+
+ /* Skip over word */
+ while (!isspace(*p))
+ p++;
+ if (!*p)
+ break;
+
+ /* Mark end of word */
+ if (*p == '\n') {
+ *p++ = 0;
+ break;
+ }
+ *p++ = 0;
+ }
+
+ *pbuf = p;
+ strv[count] = NULL;
+ return count;
+}
+
+/*
+ * Parse an address with an optional subnet mask.
+ */
+static int afs_parse_address(char *p, struct afs_addr_preference *pref)
+{
+ const char *stop;
+ unsigned long mask, tmp;
+ char *end = p + strlen(p);
+ bool bracket = false;
+
+ if (*p == '[') {
+ p++;
+ bracket = true;
+ }
+
+#if 0
+ if (*p == '[') {
+ p++;
+ q = memchr(p, ']', end - p);
+ if (!q) {
+ pr_warn("Can't find closing ']'\n");
+ return -EINVAL;
+ }
+ } else {
+ for (q = p; q < end; q++)
+ if (*q == '/')
+ break;
+ }
+#endif
+
+ if (in4_pton(p, end - p, (u8 *)&pref->ipv4_addr, -1, &stop)) {
+ pref->family = AF_INET;
+ mask = 32;
+ } else if (in6_pton(p, end - p, (u8 *)&pref->ipv6_addr, -1, &stop)) {
+ pref->family = AF_INET6;
+ mask = 128;
+ } else {
+ pr_warn("Can't determine address family\n");
+ return -EINVAL;
+ }
+
+ p = (char *)stop;
+ if (bracket) {
+ if (*p != ']') {
+ pr_warn("Can't find closing ']'\n");
+ return -EINVAL;
+ }
+ p++;
+ }
+
+ if (*p == '/') {
+ p++;
+ tmp = simple_strtoul(p, &p, 10);
+ if (tmp > mask) {
+ pr_warn("Subnet mask too large\n");
+ return -EINVAL;
+ }
+ if (tmp == 0) {
+ pr_warn("Subnet mask too small\n");
+ return -EINVAL;
+ }
+ mask = tmp;
+ }
+
+ if (*p) {
+ pr_warn("Invalid address\n");
+ return -EINVAL;
+ }
+
+ pref->subnet_mask = mask;
+ return 0;
+}
+
+enum cmp_ret {
+ CONTINUE_SEARCH,
+ INSERT_HERE,
+ EXACT_MATCH,
+ SUBNET_MATCH,
+};
+
+/*
+ * See if a candidate address matches a listed address.
+ */
+static enum cmp_ret afs_cmp_address_pref(const struct afs_addr_preference *a,
+ const struct afs_addr_preference *b)
+{
+ int subnet = min(a->subnet_mask, b->subnet_mask);
+ const __be32 *pa, *pb;
+ u32 mask, na, nb;
+ int diff;
+
+ if (a->family != b->family)
+ return INSERT_HERE;
+
+ switch (a->family) {
+ case AF_INET6:
+ pa = a->ipv6_addr.s6_addr32;
+ pb = b->ipv6_addr.s6_addr32;
+ break;
+ case AF_INET:
+ pa = &a->ipv4_addr.s_addr;
+ pb = &b->ipv4_addr.s_addr;
+ break;
+ }
+
+ while (subnet > 32) {
+ diff = ntohl(*pa++) - ntohl(*pb++);
+ if (diff < 0)
+ return INSERT_HERE; /* a<b */
+ if (diff > 0)
+ return CONTINUE_SEARCH; /* a>b */
+ subnet -= 32;
+ }
+
+ if (subnet == 0)
+ return EXACT_MATCH;
+
+ mask = 0xffffffffU << (32 - subnet);
+ na = ntohl(*pa);
+ nb = ntohl(*pb);
+ diff = (na & mask) - (nb & mask);
+ //kdebug("diff %08x %08x %08x %d", na, nb, mask, diff);
+ if (diff < 0)
+ return INSERT_HERE; /* a<b */
+ if (diff > 0)
+ return CONTINUE_SEARCH; /* a>b */
+ if (a->subnet_mask == b->subnet_mask)
+ return EXACT_MATCH;
+ if (a->subnet_mask > b->subnet_mask)
+ return SUBNET_MATCH; /* a binds tighter than b */
+ return CONTINUE_SEARCH; /* b binds tighter than a */
+}
+
+/*
+ * Insert an address preference.
+ */
+static int afs_insert_address_pref(struct afs_addr_preference_list **_preflist,
+ struct afs_addr_preference *pref,
+ int index)
+{
+ struct afs_addr_preference_list *preflist = *_preflist, *old = preflist;
+ size_t size, max_prefs;
+
+ _enter("{%u/%u/%u},%u", preflist->ipv6_off, preflist->nr, preflist->max_prefs, index);
+
+ if (preflist->nr == 255)
+ return -ENOSPC;
+ if (preflist->nr >= preflist->max_prefs) {
+ max_prefs = preflist->max_prefs + 1;
+ size = struct_size(preflist, prefs, max_prefs);
+ size = roundup_pow_of_two(size);
+ max_prefs = min_t(size_t, (size - sizeof(*preflist)) / sizeof(*pref), 255);
+ preflist = kmalloc(size, GFP_KERNEL);
+ if (!preflist)
+ return -ENOMEM;
+ *preflist = **_preflist;
+ preflist->max_prefs = max_prefs;
+ *_preflist = preflist;
+
+ if (index < preflist->nr)
+ memcpy(preflist->prefs + index + 1, old->prefs + index,
+ sizeof(*pref) * (preflist->nr - index));
+ if (index > 0)
+ memcpy(preflist->prefs, old->prefs, sizeof(*pref) * index);
+ } else {
+ if (index < preflist->nr)
+ memmove(preflist->prefs + index + 1, preflist->prefs + index,
+ sizeof(*pref) * (preflist->nr - index));
+ }
+
+ preflist->prefs[index] = *pref;
+ preflist->nr++;
+ if (pref->family == AF_INET)
+ preflist->ipv6_off++;
+ return 0;
+}
+
+/*
+ * Add an address preference.
+ * echo "add <proto> <IP>[/<mask>] <prior>" >/proc/fs/afs/addr_prefs
+ */
+static int afs_add_address_pref(struct afs_net *net, struct afs_addr_preference_list **_preflist,
+ int argc, char **argv)
+{
+ struct afs_addr_preference_list *preflist = *_preflist;
+ struct afs_addr_preference pref;
+ enum cmp_ret cmp;
+ int ret, i, stop;
+
+ if (argc != 3) {
+ pr_warn("Wrong number of params\n");
+ return -EINVAL;
+ }
+
+ if (strcmp(argv[0], "udp") != 0) {
+ pr_warn("Unsupported protocol\n");
+ return -EINVAL;
+ }
+
+ ret = afs_parse_address(argv[1], &pref);
+ if (ret < 0)
+ return ret;
+
+ ret = kstrtou16(argv[2], 10, &pref.prio);
+ if (ret < 0) {
+ pr_warn("Invalid priority\n");
+ return ret;
+ }
+
+ if (pref.family == AF_INET) {
+ i = 0;
+ stop = preflist->ipv6_off;
+ } else {
+ i = preflist->ipv6_off;
+ stop = preflist->nr;
+ }
+
+ for (; i < stop; i++) {
+ cmp = afs_cmp_address_pref(&pref, &preflist->prefs[i]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ case SUBNET_MATCH:
+ return afs_insert_address_pref(_preflist, &pref, i);
+ case EXACT_MATCH:
+ preflist->prefs[i].prio = pref.prio;
+ return 0;
+ }
+ }
+
+ return afs_insert_address_pref(_preflist, &pref, i);
+}
+
+/*
+ * Delete an address preference.
+ */
+static int afs_delete_address_pref(struct afs_addr_preference_list **_preflist,
+ int index)
+{
+ struct afs_addr_preference_list *preflist = *_preflist;
+
+ _enter("{%u/%u/%u},%u", preflist->ipv6_off, preflist->nr, preflist->max_prefs, index);
+
+ if (preflist->nr == 0)
+ return -ENOENT;
+
+ if (index < preflist->nr - 1)
+ memmove(preflist->prefs + index, preflist->prefs + index + 1,
+ sizeof(preflist->prefs[0]) * (preflist->nr - index - 1));
+
+ if (index < preflist->ipv6_off)
+ preflist->ipv6_off--;
+ preflist->nr--;
+ return 0;
+}
+
+/*
+ * Delete an address preference.
+ * echo "del <proto> <IP>[/<mask>]" >/proc/fs/afs/addr_prefs
+ */
+static int afs_del_address_pref(struct afs_net *net, struct afs_addr_preference_list **_preflist,
+ int argc, char **argv)
+{
+ struct afs_addr_preference_list *preflist = *_preflist;
+ struct afs_addr_preference pref;
+ enum cmp_ret cmp;
+ int ret, i, stop;
+
+ if (argc != 2) {
+ pr_warn("Wrong number of params\n");
+ return -EINVAL;
+ }
+
+ if (strcmp(argv[0], "udp") != 0) {
+ pr_warn("Unsupported protocol\n");
+ return -EINVAL;
+ }
+
+ ret = afs_parse_address(argv[1], &pref);
+ if (ret < 0)
+ return ret;
+
+ if (pref.family == AF_INET) {
+ i = 0;
+ stop = preflist->ipv6_off;
+ } else {
+ i = preflist->ipv6_off;
+ stop = preflist->nr;
+ }
+
+ for (; i < stop; i++) {
+ cmp = afs_cmp_address_pref(&pref, &preflist->prefs[i]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ case SUBNET_MATCH:
+ return 0;
+ case EXACT_MATCH:
+ return afs_delete_address_pref(_preflist, i);
+ }
+ }
+
+ return -ENOANO;
+}
+
+/*
+ * Handle writes to /proc/fs/afs/addr_prefs
+ */
+int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size)
+{
+ struct afs_addr_preference_list *preflist, *old;
+ struct seq_file *m = file->private_data;
+ struct afs_net *net = afs_seq2net_single(m);
+ size_t psize;
+ char *argv[5];
+ int ret, argc, max_prefs;
+
+ inode_lock(file_inode(file));
+
+ /* Allocate a candidate new list and initialise it from the old. */
+ old = rcu_dereference_protected(net->address_prefs,
+ lockdep_is_held(&file_inode(file)->i_rwsem));
+
+ if (old)
+ max_prefs = old->nr + 1;
+ else
+ max_prefs = 1;
+
+ psize = struct_size(old, prefs, max_prefs);
+ psize = roundup_pow_of_two(psize);
+ max_prefs = min_t(size_t, (psize - sizeof(*old)) / sizeof(old->prefs[0]), 255);
+
+ ret = -ENOMEM;
+ preflist = kmalloc(struct_size(preflist, prefs, max_prefs), GFP_KERNEL);
+ if (!preflist)
+ goto done;
+
+ if (old)
+ memcpy(preflist, old, struct_size(preflist, prefs, old->nr));
+ else
+ memset(preflist, 0, sizeof(*preflist));
+ preflist->max_prefs = max_prefs;
+
+ do {
+ argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv));
+ if (argc < 0)
+ return argc;
+ if (argc < 2)
+ goto inval;
+
+ if (strcmp(argv[0], "add") == 0)
+ ret = afs_add_address_pref(net, &preflist, argc - 1, argv + 1);
+ else if (strcmp(argv[0], "del") == 0)
+ ret = afs_del_address_pref(net, &preflist, argc - 1, argv + 1);
+ else
+ goto inval;
+ if (ret < 0)
+ goto done;
+ } while (*buf);
+
+ preflist->version++;
+ rcu_assign_pointer(net->address_prefs, preflist);
+ /* Store prefs before version */
+ smp_store_release(&net->address_pref_version, preflist->version);
+ kfree_rcu(old, rcu);
+ preflist = NULL;
+ ret = 0;
+
+done:
+ kfree(preflist);
+ inode_unlock(file_inode(file));
+ _leave(" = %d", ret);
+ return ret;
+
+inval:
+ pr_warn("Invalid Command\n");
+ ret = -EINVAL;
+ goto done;
+}
+
+/*
+ * Mark the priorities on an address list if the address preferences table has
+ * changed. The caller must hold the RCU read lock.
+ */
+void afs_get_address_preferences_rcu(struct afs_net *net, struct afs_addr_list *alist)
+{
+ const struct afs_addr_preference_list *preflist =
+ rcu_dereference(net->address_prefs);
+ const struct sockaddr_in6 *sin6;
+ const struct sockaddr_in *sin;
+ const struct sockaddr *sa;
+ struct afs_addr_preference test;
+ enum cmp_ret cmp;
+ int i, j;
+
+ if (!preflist || !preflist->nr || !alist->nr_addrs ||
+ smp_load_acquire(&alist->addr_pref_version) == preflist->version)
+ return;
+
+ test.family = AF_INET;
+ test.subnet_mask = 32;
+ test.prio = 0;
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ sa = rxrpc_kernel_remote_addr(alist->addrs[i].peer);
+ sin = (const struct sockaddr_in *)sa;
+ test.ipv4_addr = sin->sin_addr;
+ for (j = 0; j < preflist->ipv6_off; j++) {
+ cmp = afs_cmp_address_pref(&test, &preflist->prefs[j]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ break;
+ case EXACT_MATCH:
+ case SUBNET_MATCH:
+ WRITE_ONCE(alist->addrs[i].prio, preflist->prefs[j].prio);
+ break;
+ }
+ }
+ }
+
+ test.family = AF_INET6;
+ test.subnet_mask = 128;
+ test.prio = 0;
+ for (; i < alist->nr_addrs; i++) {
+ sa = rxrpc_kernel_remote_addr(alist->addrs[i].peer);
+ sin6 = (const struct sockaddr_in6 *)sa;
+ test.ipv6_addr = sin6->sin6_addr;
+ for (j = preflist->ipv6_off; j < preflist->nr; j++) {
+ cmp = afs_cmp_address_pref(&test, &preflist->prefs[j]);
+ switch (cmp) {
+ case CONTINUE_SEARCH:
+ continue;
+ case INSERT_HERE:
+ break;
+ case EXACT_MATCH:
+ case SUBNET_MATCH:
+ WRITE_ONCE(alist->addrs[i].prio, preflist->prefs[j].prio);
+ break;
+ }
+ }
+ }
+
+ smp_store_release(&alist->addr_pref_version, preflist->version);
+}
+
+/*
+ * Mark the priorities on an address list if the address preferences table has
+ * changed. Avoid taking the RCU read lock if we can.
+ */
+void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alist)
+{
+ if (!net->address_prefs ||
+ /* Load version before prefs */
+ smp_load_acquire(&net->address_pref_version) == alist->addr_pref_version)
+ return;
+
+ rcu_read_lock();
+ afs_get_address_preferences_rcu(net, alist);
+ rcu_read_unlock();
+}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 81815724db6c..b488072aee87 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -165,7 +165,8 @@ struct afs_status_cb {
* AFS volume synchronisation information
*/
struct afs_volsync {
- time64_t creation; /* volume creation time */
+ time64_t creation; /* Volume creation time (or TIME64_MIN) */
+ time64_t update; /* Volume update time (or TIME64_MIN) */
};
/*
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index a484fa642808..99b2c8172021 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -33,22 +33,20 @@ void afs_invalidate_mmap_work(struct work_struct *work)
unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
}
-void afs_server_init_callback_work(struct work_struct *work)
+static void afs_volume_init_callback(struct afs_volume *volume)
{
- struct afs_server *server = container_of(work, struct afs_server, initcb_work);
struct afs_vnode *vnode;
- struct afs_cell *cell = server->cell;
- down_read(&cell->fs_open_mmaps_lock);
+ down_read(&volume->open_mmaps_lock);
- list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) {
- if (vnode->cb_server == server) {
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
+ if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
+ atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
queue_work(system_unbound_wq, &vnode->cb_work);
}
}
- up_read(&cell->fs_open_mmaps_lock);
+ up_read(&volume->open_mmaps_lock);
}
/*
@@ -57,15 +55,20 @@ void afs_server_init_callback_work(struct work_struct *work)
*/
void afs_init_callback_state(struct afs_server *server)
{
- rcu_read_lock();
- do {
- server->cb_s_break++;
- atomic_inc(&server->cell->fs_s_break);
- if (!list_empty(&server->cell->fs_open_mmaps))
- queue_work(system_unbound_wq, &server->initcb_work);
+ struct afs_server_entry *se;
- } while ((server = rcu_dereference(server->uuid_next)));
- rcu_read_unlock();
+ down_read(&server->cell->vs_lock);
+
+ list_for_each_entry(se, &server->volumes, slink) {
+ se->cb_expires_at = AFS_NO_CB_PROMISE;
+ se->volume->cb_expires_at = AFS_NO_CB_PROMISE;
+ trace_afs_cb_v_break(se->volume->vid, atomic_read(&se->volume->cb_v_break),
+ afs_cb_break_for_s_reinit);
+ if (!list_empty(&se->volume->open_mmaps))
+ afs_volume_init_callback(se->volume);
+ }
+
+ up_read(&server->cell->vs_lock);
}
/*
@@ -76,9 +79,9 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
_enter("");
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) {
vnode->cb_break++;
- vnode->cb_v_break = vnode->volume->cb_v_break;
+ vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
afs_clear_permits(vnode);
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
@@ -110,13 +113,14 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
{
struct afs_volume *volume = NULL;
struct rb_node *p;
- int seq = 0;
+ int seq = 1;
- do {
+ for (;;) {
/* Unfortunately, rbtree walking doesn't give reliable results
* under just the RCU read lock, so we have to check for
* changes.
*/
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&cell->volume_lock, &seq);
p = rcu_dereference_raw(cell->volumes.rb_node);
@@ -132,35 +136,63 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
volume = NULL;
}
- } while (need_seqretry(&cell->volume_lock, seq));
+ if (volume && afs_try_get_volume(volume, afs_volume_trace_get_callback))
+ break;
+ if (!need_seqretry(&cell->volume_lock, seq))
+ break;
+ seq |= 1; /* Want a lock next time */
+ }
done_seqretry(&cell->volume_lock, seq);
return volume;
}
/*
+ * Allow the fileserver to break callbacks at the volume-level. This is
+ * typically done when, for example, a R/W volume is snapshotted to a R/O
+ * volume (the only way to change an R/O volume). It may also, however, happen
+ * when a volserver takes control of a volume (offlining it, moving it, etc.).
+ *
+ * Every file in that volume will need to be reevaluated.
+ */
+static void afs_break_volume_callback(struct afs_server *server,
+ struct afs_volume *volume)
+ __releases(RCU)
+{
+ struct afs_server_list *slist = rcu_dereference(volume->servers);
+ unsigned int i, cb_v_break;
+
+ write_lock(&volume->cb_v_break_lock);
+
+ for (i = 0; i < slist->nr_servers; i++)
+ if (slist->servers[i].server == server)
+ slist->servers[i].cb_expires_at = AFS_NO_CB_PROMISE;
+ volume->cb_expires_at = AFS_NO_CB_PROMISE;
+
+ cb_v_break = atomic_inc_return_release(&volume->cb_v_break);
+ trace_afs_cb_v_break(volume->vid, cb_v_break, afs_cb_break_for_volume_callback);
+
+ write_unlock(&volume->cb_v_break_lock);
+ rcu_read_unlock();
+
+ if (!list_empty(&volume->open_mmaps))
+ afs_volume_init_callback(volume);
+}
+
+/*
* allow the fileserver to explicitly break one callback
* - happens when
* - the backing file is changed
* - a lock is released
*/
-static void afs_break_one_callback(struct afs_volume *volume,
+static void afs_break_one_callback(struct afs_server *server,
+ struct afs_volume *volume,
struct afs_fid *fid)
{
struct super_block *sb;
struct afs_vnode *vnode;
struct inode *inode;
- if (fid->vnode == 0 && fid->unique == 0) {
- /* The callback break applies to an entire volume. */
- write_lock(&volume->cb_v_break_lock);
- volume->cb_v_break++;
- trace_afs_cb_break(fid, volume->cb_v_break,
- afs_cb_break_for_volume_callback, false);
- write_unlock(&volume->cb_v_break_lock);
- return;
- }
-
/* See if we can find a matching inode - even an I_NEW inode needs to
* be marked as it can have its callback broken before we finish
* setting up the local inode.
@@ -187,25 +219,35 @@ static void afs_break_some_callbacks(struct afs_server *server,
afs_volid_t vid = cbb->fid.vid;
size_t i;
+ rcu_read_lock();
volume = afs_lookup_volume_rcu(server->cell, vid);
+ if (cbb->fid.vnode == 0 && cbb->fid.unique == 0) {
+ afs_break_volume_callback(server, volume);
+ *_count -= 1;
+ if (*_count)
+ memmove(cbb, cbb + 1, sizeof(*cbb) * *_count);
+ } else {
+ /* TODO: Find all matching volumes if we couldn't match the server and
+ * break them anyway.
+ */
- /* TODO: Find all matching volumes if we couldn't match the server and
- * break them anyway.
- */
-
- for (i = *_count; i > 0; cbb++, i--) {
- if (cbb->fid.vid == vid) {
- _debug("- Fid { vl=%08llx n=%llu u=%u }",
- cbb->fid.vid,
- cbb->fid.vnode,
- cbb->fid.unique);
- --*_count;
- if (volume)
- afs_break_one_callback(volume, &cbb->fid);
- } else {
- *residue++ = *cbb;
+ for (i = *_count; i > 0; cbb++, i--) {
+ if (cbb->fid.vid == vid) {
+ _debug("- Fid { vl=%08llx n=%llu u=%u }",
+ cbb->fid.vid,
+ cbb->fid.vnode,
+ cbb->fid.unique);
+ --*_count;
+ if (volume)
+ afs_break_one_callback(server, volume, &cbb->fid);
+ } else {
+ *residue++ = *cbb;
+ }
}
+ rcu_read_unlock();
}
+
+ afs_put_volume(volume, afs_volume_trace_put_callback);
}
/*
@@ -218,11 +260,6 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
ASSERT(server != NULL);
- rcu_read_lock();
-
while (count > 0)
afs_break_some_callbacks(server, callbacks, &count);
-
- rcu_read_unlock();
- return;
}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 926cb1188eba..caa09875f520 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -161,13 +161,12 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
refcount_set(&cell->ref, 1);
atomic_set(&cell->active, 0);
INIT_WORK(&cell->manager, afs_manage_cell_work);
+ init_rwsem(&cell->vs_lock);
cell->volumes = RB_ROOT;
INIT_HLIST_HEAD(&cell->proc_volumes);
seqlock_init(&cell->volume_lock);
cell->fs_servers = RB_ROOT;
seqlock_init(&cell->fs_lock);
- INIT_LIST_HEAD(&cell->fs_open_mmaps);
- init_rwsem(&cell->fs_open_mmaps_lock);
rwlock_init(&cell->vl_servers_lock);
cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
@@ -817,7 +816,7 @@ done:
final_destruction:
/* The root volume is pinning the cell */
- afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root);
+ afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root);
cell->root_volume = NULL;
afs_put_cell(cell, afs_cell_trace_put_destroy);
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index d4ddb20d6732..99a3f20bc786 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -146,10 +146,11 @@ static int afs_find_cm_server_by_peer(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
struct afs_server *server;
+ struct rxrpc_peer *peer;
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+ peer = rxrpc_kernel_get_call_peer(call->net->socket, call->rxcall);
- server = afs_find_server(call->net, &srx);
+ server = afs_find_server(call->net, peer);
if (!server) {
trace_afs_cm_no_server(call, &srx);
return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5219182e52e1..c14533ef108f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -693,8 +693,9 @@ static void afs_do_lookup_success(struct afs_operation *op)
vp = &op->file[0];
abort_code = vp->scb.status.abort_code;
if (abort_code != 0) {
- op->ac.abort_code = abort_code;
- op->error = afs_abort_to_error(abort_code);
+ op->call_abort_code = abort_code;
+ afs_op_set_error(op, afs_abort_to_error(abort_code));
+ op->cumul_error.abort_code = abort_code;
}
break;
@@ -806,8 +807,8 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->fids[i].vid = dvnode->fid.vid;
cookie->ctx.actor = afs_lookup_filldir;
cookie->name = dentry->d_name;
- cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want
- * and slot 1 for the directory */
+ cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want
+ * and slot 0 for the directory */
if (!afs_server_supports_ibulk(dvnode))
cookie->one_only = true;
@@ -846,13 +847,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
_debug("nr_files %u", op->nr_files);
/* Need space for examining all the selected files */
- op->error = -ENOMEM;
if (op->nr_files > 2) {
op->more_files = kvcalloc(op->nr_files - 2,
sizeof(struct afs_vnode_param),
GFP_KERNEL);
- if (!op->more_files)
+ if (!op->more_files) {
+ afs_op_nomem(op);
goto out_op;
+ }
for (i = 2; i < op->nr_files; i++) {
vp = &op->more_files[i - 2];
@@ -878,14 +880,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
* lookups contained therein are stored in the reply without aborting
* the whole operation.
*/
- op->error = -ENOTSUPP;
+ afs_op_set_error(op, -ENOTSUPP);
if (!cookie->one_only) {
op->ops = &afs_inline_bulk_status_operation;
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
- if (op->error == -ENOTSUPP) {
+ if (afs_op_error(op) == -ENOTSUPP) {
/* We could try FS.BulkStatus next, but this aborts the entire
* op if any of the lookups fails - so, for the moment, revert
* to FS.FetchStatus for op->file[1].
@@ -895,10 +897,10 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
- inode = ERR_PTR(op->error);
+ inode = ERR_PTR(afs_op_error(op));
out_op:
- if (op->error == 0) {
+ if (!afs_op_error(op)) {
inode = &op->file[1].vnode->netfs.inode;
op->file[1].vnode = NULL;
}
@@ -1116,7 +1118,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
dir = AFS_FS_I(d_inode(parent));
/* validate the parent directory */
- afs_validate(dir, key);
+ ret = afs_validate(dir, key);
+ if (ret == -ERESTARTSYS) {
+ dput(parent);
+ key_put(key);
+ return ret;
+ }
if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
_debug("%pd: parent dir deleted", dentry);
@@ -1255,9 +1262,10 @@ void afs_check_for_remote_deletion(struct afs_operation *op)
{
struct afs_vnode *vnode = op->file[0].vnode;
- switch (op->ac.abort_code) {
+ switch (afs_op_abort_code(op)) {
case VNOVNODE:
set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_nlink(&vnode->netfs.inode);
afs_break_callback(vnode, afs_cb_break_for_deleted);
}
}
@@ -1273,20 +1281,20 @@ static void afs_vnode_new_inode(struct afs_operation *op)
_enter("");
- ASSERTCMP(op->error, ==, 0);
+ ASSERTCMP(afs_op_error(op), ==, 0);
inode = afs_iget(op, vp);
if (IS_ERR(inode)) {
/* ENOMEM or EINTR at a really inconvenient time - just abandon
* the new directory on the server.
*/
- op->error = PTR_ERR(inode);
+ afs_op_accumulate_error(op, PTR_ERR(inode), 0);
return;
}
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- if (!op->error)
+ if (!afs_op_error(op))
afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
d_instantiate(op->dentry, inode);
}
@@ -1320,7 +1328,7 @@ static void afs_create_put(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- if (op->error)
+ if (afs_op_error(op))
d_drop(op->dentry);
}
@@ -1373,7 +1381,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
clear_nlink(&vnode->netfs.inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
}
}
@@ -1480,7 +1488,7 @@ static void afs_dir_remove_link(struct afs_operation *op)
struct dentry *dentry = op->dentry;
int ret;
- if (op->error != 0 ||
+ if (afs_op_error(op) ||
(op->file[1].scb.have_status && op->file[1].scb.have_error))
return;
if (d_really_is_positive(dentry))
@@ -1504,10 +1512,10 @@ static void afs_dir_remove_link(struct afs_operation *op)
ret = afs_validate(vnode, op->key);
if (ret != -ESTALE)
- op->error = ret;
+ afs_op_set_error(op, ret);
}
- _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, op->error);
+ _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, afs_op_error(op));
}
static void afs_unlink_success(struct afs_operation *op)
@@ -1538,7 +1546,7 @@ static void afs_unlink_edit_dir(struct afs_operation *op)
static void afs_unlink_put(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- if (op->unlink.need_rehash && op->error < 0 && op->error != -ENOENT)
+ if (op->unlink.need_rehash && afs_op_error(op) < 0 && afs_op_error(op) != -ENOENT)
d_rehash(op->dentry);
}
@@ -1579,7 +1587,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
/* Try to make sure we have a callback promise on the victim. */
ret = afs_validate(vnode, op->key);
if (ret < 0) {
- op->error = ret;
+ afs_op_set_error(op, ret);
goto error;
}
@@ -1588,7 +1596,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
- op->error = afs_sillyrename(dvnode, vnode, dentry, op->key);
+ afs_op_set_error(op, afs_sillyrename(dvnode, vnode, dentry, op->key));
goto error;
}
if (!d_unhashed(dentry)) {
@@ -1609,7 +1617,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
/* If there was a conflict with a third party, check the status of the
* unlinked vnode.
*/
- if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ if (afs_op_error(op) == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
op->file[1].update_ctime = false;
op->fetch_status.which = 1;
op->ops = &afs_fetch_status_operation;
@@ -1691,7 +1699,7 @@ static void afs_link_success(struct afs_operation *op)
static void afs_link_put(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- if (op->error)
+ if (afs_op_error(op))
d_drop(op->dentry);
}
@@ -1889,7 +1897,7 @@ static void afs_rename_put(struct afs_operation *op)
if (op->rename.rehash)
d_rehash(op->rename.rehash);
dput(op->rename.tmp);
- if (op->error)
+ if (afs_op_error(op))
d_rehash(op->dentry);
}
@@ -1934,7 +1942,7 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return PTR_ERR(op);
ret = afs_validate(vnode, op->key);
- op->error = ret;
+ afs_op_set_error(op, ret);
if (ret < 0)
goto error;
@@ -1971,7 +1979,7 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
op->rename.tmp = d_alloc(new_dentry->d_parent,
&new_dentry->d_name);
if (!op->rename.tmp) {
- op->error = -ENOMEM;
+ afs_op_nomem(op);
goto error;
}
@@ -1979,7 +1987,7 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
AFS_FS_I(d_inode(new_dentry)),
new_dentry, op->key);
if (ret) {
- op->error = ret;
+ afs_op_set_error(op, ret);
goto error;
}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index bb5807e87fa4..a1e581946b93 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -218,7 +218,7 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
/* If there was a conflict with a third party, check the status of the
* unlinked vnode.
*/
- if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ if (op->cumul_error.error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
op->file[1].update_ctime = false;
op->fetch_status.which = 1;
op->ops = &afs_fetch_status_operation;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index d37dd201752b..30914e0d9cb2 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -243,12 +243,9 @@ static void afs_fetch_data_notify(struct afs_operation *op)
{
struct afs_read *req = op->fetch.req;
struct netfs_io_subrequest *subreq = req->subreq;
- int error = op->error;
+ int error = afs_op_error(op);
- if (error == -ECONNABORTED)
- error = afs_abort_to_error(op->ac.abort_code);
req->error = error;
-
if (subreq) {
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
@@ -271,7 +268,7 @@ static void afs_fetch_data_success(struct afs_operation *op)
static void afs_fetch_data_put(struct afs_operation *op)
{
- op->fetch.req->error = op->error;
+ op->fetch.req->error = afs_op_error(op);
afs_put_read(op->fetch.req);
}
@@ -517,13 +514,12 @@ static bool afs_release_folio(struct folio *folio, gfp_t gfp)
static void afs_add_open_mmap(struct afs_vnode *vnode)
{
if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
- down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ down_write(&vnode->volume->open_mmaps_lock);
if (list_empty(&vnode->cb_mmap_link))
- list_add_tail(&vnode->cb_mmap_link,
- &vnode->volume->cell->fs_open_mmaps);
+ list_add_tail(&vnode->cb_mmap_link, &vnode->volume->open_mmaps);
- up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ up_write(&vnode->volume->open_mmaps_lock);
}
}
@@ -532,12 +528,12 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode)
if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
return;
- down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ down_write(&vnode->volume->open_mmaps_lock);
if (atomic_read(&vnode->cb_nr_mmap) == 0)
list_del_init(&vnode->cb_mmap_link);
- up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ up_write(&vnode->volume->open_mmaps_lock);
flush_work(&vnode->cb_work);
}
@@ -573,7 +569,7 @@ static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pg
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
- if (afs_pagecache_valid(vnode))
+ if (afs_check_validity(vnode))
return filemap_map_pages(vmf, start_pgoff, end_pgoff);
return 0;
}
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 7a3803ce3a22..3546b087e791 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -35,13 +35,15 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo
key_get(key);
}
- op->key = key;
- op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op);
- op->net = volume->cell->net;
- op->cb_v_break = volume->cb_v_break;
- op->debug_id = atomic_inc_return(&afs_operation_debug_counter);
- op->error = -EDESTADDRREQ;
- op->ac.error = SHRT_MAX;
+ op->key = key;
+ op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op);
+ op->net = volume->cell->net;
+ op->cb_v_break = atomic_read(&volume->cb_v_break);
+ op->pre_volsync.creation = volume->creation_time;
+ op->pre_volsync.update = volume->update_time;
+ op->debug_id = atomic_inc_return(&afs_operation_debug_counter);
+ op->nr_iterations = -1;
+ afs_op_set_error(op, -EDESTADDRREQ);
_leave(" = [op=%08x]", op->debug_id);
return op;
@@ -71,7 +73,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
swap(vnode, vnode2);
if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
- op->error = -ERESTARTSYS;
+ afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
_leave(" = f [I 0]");
return false;
@@ -80,7 +82,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
if (vnode2) {
if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
- op->error = -ERESTARTSYS;
+ afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
mutex_unlock(&vnode->io_lock);
op->flags &= ~AFS_OPERATION_LOCK_0;
@@ -147,7 +149,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
afs_prepare_vnode(op, &op->file[0], 0);
afs_prepare_vnode(op, &op->file[1], 1);
- op->cb_v_break = op->volume->cb_v_break;
+ op->cb_v_break = atomic_read(&op->volume->cb_v_break);
_leave(" = true");
return true;
}
@@ -159,16 +161,16 @@ static void afs_end_vnode_operation(struct afs_operation *op)
{
_enter("");
- if (op->error == -EDESTADDRREQ ||
- op->error == -EADDRNOTAVAIL ||
- op->error == -ENETUNREACH ||
- op->error == -EHOSTUNREACH)
+ switch (afs_op_error(op)) {
+ case -EDESTADDRREQ:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
afs_dump_edestaddrreq(op);
+ break;
+ }
afs_drop_io_locks(op);
-
- if (op->error == -ECONNABORTED)
- op->error = afs_abort_to_error(op->ac.abort_code);
}
/*
@@ -179,37 +181,43 @@ void afs_wait_for_operation(struct afs_operation *op)
_enter("");
while (afs_select_fileserver(op)) {
- op->cb_s_break = op->server->cb_s_break;
+ op->call_responded = false;
+ op->call_error = 0;
+ op->call_abort_code = 0;
if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
op->ops->issue_yfs_rpc)
op->ops->issue_yfs_rpc(op);
else if (op->ops->issue_afs_rpc)
op->ops->issue_afs_rpc(op);
else
- op->ac.error = -ENOTSUPP;
-
- if (op->call)
- op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
+ op->call_error = -ENOTSUPP;
+
+ if (op->call) {
+ afs_wait_for_call_to_complete(op->call);
+ op->call_abort_code = op->call->abort_code;
+ op->call_error = op->call->error;
+ op->call_responded = op->call->responded;
+ afs_put_call(op->call);
+ }
}
- switch (op->error) {
- case 0:
+ if (op->call_responded)
+ set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+ if (!afs_op_error(op)) {
_debug("success");
op->ops->success(op);
- break;
- case -ECONNABORTED:
+ } else if (op->cumul_error.aborted) {
if (op->ops->aborted)
op->ops->aborted(op);
- fallthrough;
- default:
+ } else {
if (op->ops->failed)
op->ops->failed(op);
- break;
}
afs_end_vnode_operation(op);
- if (op->error == 0 && op->ops->edit_dir) {
+ if (!afs_op_error(op) && op->ops->edit_dir) {
_debug("edit_dir");
op->ops->edit_dir(op);
}
@@ -221,7 +229,8 @@ void afs_wait_for_operation(struct afs_operation *op)
*/
int afs_put_operation(struct afs_operation *op)
{
- int i, ret = op->error;
+ struct afs_addr_list *alist;
+ int i, ret = afs_op_error(op);
_enter("op=%08x,%d", op->debug_id, ret);
@@ -243,9 +252,19 @@ int afs_put_operation(struct afs_operation *op)
kfree(op->more_files);
}
- afs_end_cursor(&op->ac);
+ if (op->estate) {
+ alist = op->estate->addresses;
+ if (alist) {
+ if (op->call_responded &&
+ op->addr_index != alist->preferred &&
+ test_bit(alist->preferred, &op->addr_tried))
+ WRITE_ONCE(alist->preferred, op->addr_index);
+ }
+ }
+
+ afs_clear_server_states(op);
afs_put_serverlist(op->net, op->server_list);
- afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op);
+ afs_put_volume(op->volume, afs_volume_trace_put_put_op);
key_put(op->key);
kfree(op);
return ret;
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index daaf3810cc92..580de4adaaf6 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -15,6 +15,42 @@
static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
+struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
+ enum afs_estate_trace where)
+{
+ if (estate) {
+ int r;
+
+ __refcount_inc(&estate->ref, &r);
+ trace_afs_estate(estate->server_id, estate->probe_seq, r, where);
+ }
+ return estate;
+}
+
+static void afs_endpoint_state_rcu(struct rcu_head *rcu)
+{
+ struct afs_endpoint_state *estate = container_of(rcu, struct afs_endpoint_state, rcu);
+
+ trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+ afs_estate_trace_free);
+ afs_put_addrlist(estate->addresses, afs_alist_trace_put_estate);
+ kfree(estate);
+}
+
+void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where)
+{
+ if (estate) {
+ unsigned int server_id = estate->server_id, probe_seq = estate->probe_seq;
+ bool dead;
+ int r;
+
+ dead = __refcount_dec_and_test(&estate->ref, &r);
+ trace_afs_estate(server_id, probe_seq, r, where);
+ if (dead)
+ call_rcu(&estate->rcu, afs_endpoint_state_rcu);
+ }
+}
+
/*
* Start the probe polling timer. We have to supply it with an inc on the
* outstanding server count.
@@ -38,9 +74,10 @@ static void afs_schedule_fs_probe(struct afs_net *net,
/*
* Handle the completion of a set of probes.
*/
-static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
+static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server,
+ struct afs_endpoint_state *estate)
{
- bool responded = server->probe.responded;
+ bool responded = test_bit(AFS_ESTATE_RESPONDED, &estate->flags);
write_seqlock(&net->fs_lock);
if (responded) {
@@ -50,6 +87,7 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server
clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
list_add_tail(&server->probe_link, &net->fs_probe_fast);
}
+
write_sequnlock(&net->fs_lock);
afs_schedule_fs_probe(net, server, !responded);
@@ -58,12 +96,13 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server
/*
* Handle the completion of a probe.
*/
-static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
+static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server,
+ struct afs_endpoint_state *estate)
{
_enter("");
- if (atomic_dec_and_test(&server->probe_outstanding))
- afs_finished_fs_probe(net, server);
+ if (atomic_dec_and_test(&estate->nr_probing))
+ afs_finished_fs_probe(net, server, estate);
wake_up_all(&server->probe_wq);
}
@@ -74,24 +113,22 @@ static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server
*/
static void afs_fs_probe_not_done(struct afs_net *net,
struct afs_server *server,
- struct afs_addr_cursor *ac)
+ struct afs_endpoint_state *estate,
+ int index)
{
- struct afs_addr_list *alist = ac->alist;
- unsigned int index = ac->index;
-
_enter("");
trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
spin_lock(&server->probe_lock);
- server->probe.local_failure = true;
- if (server->probe.error == 0)
- server->probe.error = -ENOMEM;
+ set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
+ if (estate->error == 0)
+ estate->error = -ENOMEM;
- set_bit(index, &alist->failed);
+ set_bit(index, &estate->failed_set);
spin_unlock(&server->probe_lock);
- return afs_done_one_fs_probe(net, server);
+ return afs_done_one_fs_probe(net, server, estate);
}
/*
@@ -100,30 +137,34 @@ static void afs_fs_probe_not_done(struct afs_net *net,
*/
void afs_fileserver_probe_result(struct afs_call *call)
{
- struct afs_addr_list *alist = call->alist;
+ struct afs_endpoint_state *estate = call->probe;
+ struct afs_addr_list *alist = estate->addresses;
+ struct afs_address *addr = &alist->addrs[call->probe_index];
struct afs_server *server = call->server;
- unsigned int index = call->addr_ix;
- unsigned int rtt_us = 0, cap0;
+ unsigned int index = call->probe_index;
+ unsigned int rtt_us = -1, cap0;
int ret = call->error;
_enter("%pU,%u", &server->uuid, index);
+ WRITE_ONCE(addr->last_error, ret);
+
spin_lock(&server->probe_lock);
switch (ret) {
case 0:
- server->probe.error = 0;
+ estate->error = 0;
goto responded;
case -ECONNABORTED:
- if (!server->probe.responded) {
- server->probe.abort_code = call->abort_code;
- server->probe.error = ret;
+ if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) {
+ estate->abort_code = call->abort_code;
+ estate->error = ret;
}
goto responded;
case -ENOMEM:
case -ENONET:
- clear_bit(index, &alist->responded);
- server->probe.local_failure = true;
+ clear_bit(index, &estate->responsive_set);
+ set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
@@ -136,29 +177,29 @@ void afs_fileserver_probe_result(struct afs_call *call)
case -ETIMEDOUT:
case -ETIME:
default:
- clear_bit(index, &alist->responded);
- set_bit(index, &alist->failed);
- if (!server->probe.responded &&
- (server->probe.error == 0 ||
- server->probe.error == -ETIMEDOUT ||
- server->probe.error == -ETIME))
- server->probe.error = ret;
+ clear_bit(index, &estate->responsive_set);
+ set_bit(index, &estate->failed_set);
+ if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags) &&
+ (estate->error == 0 ||
+ estate->error == -ETIMEDOUT ||
+ estate->error == -ETIME))
+ estate->error = ret;
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
goto out;
}
responded:
- clear_bit(index, &alist->failed);
+ clear_bit(index, &estate->failed_set);
if (call->service_id == YFS_FS_SERVICE) {
- server->probe.is_yfs = true;
+ set_bit(AFS_ESTATE_IS_YFS, &estate->flags);
set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
} else {
- server->probe.not_yfs = true;
- if (!server->probe.is_yfs) {
+ set_bit(AFS_ESTATE_NOT_YFS, &estate->flags);
+ if (!test_bit(AFS_ESTATE_IS_YFS, &estate->flags)) {
clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
}
cap0 = ntohl(call->tmp);
if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
@@ -167,116 +208,136 @@ responded:
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
}
- rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
- if (rtt_us < server->probe.rtt) {
- server->probe.rtt = rtt_us;
+ rtt_us = rxrpc_kernel_get_srtt(addr->peer);
+ if (rtt_us < estate->rtt) {
+ estate->rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
}
smp_wmb(); /* Set rtt before responded. */
- server->probe.responded = true;
- set_bit(index, &alist->responded);
+ set_bit(AFS_ESTATE_RESPONDED, &estate->flags);
+ set_bit(index, &estate->responsive_set);
set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
out:
spin_unlock(&server->probe_lock);
- _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
- &server->uuid, index, &alist->addrs[index].transport,
+ trace_afs_fs_probe(server, false, estate, index, call->error, call->abort_code, rtt_us);
+ _debug("probe[%x] %pU [%u] %pISpc rtt=%d ret=%d",
+ estate->probe_seq, &server->uuid, index,
+ rxrpc_kernel_remote_addr(alist->addrs[index].peer),
rtt_us, ret);
- return afs_done_one_fs_probe(call->net, server);
+ return afs_done_one_fs_probe(call->net, server, estate);
}
/*
- * Probe one or all of a fileserver's addresses to find out the best route and
- * to query its capabilities.
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
*/
void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
- struct key *key, bool all)
+ struct afs_addr_list *new_alist, struct key *key)
{
- struct afs_addr_cursor ac = {
- .index = 0,
- };
+ struct afs_endpoint_state *estate, *old;
+ struct afs_addr_list *alist;
+ unsigned long unprobed;
_enter("%pU", &server->uuid);
- read_lock(&server->fs_lock);
- ac.alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->fs_lock));
- afs_get_addrlist(ac.alist);
- read_unlock(&server->fs_lock);
+ estate = kzalloc(sizeof(*estate), GFP_KERNEL);
+ if (!estate)
+ return;
+
+ refcount_set(&estate->ref, 1);
+ estate->server_id = server->debug_id;
+ estate->rtt = UINT_MAX;
+
+ write_lock(&server->fs_lock);
+
+ old = rcu_dereference_protected(server->endpoint_state,
+ lockdep_is_held(&server->fs_lock));
+ estate->responsive_set = old->responsive_set;
+ estate->addresses = afs_get_addrlist(new_alist ?: old->addresses,
+ afs_alist_trace_get_estate);
+ alist = estate->addresses;
+ estate->probe_seq = ++server->probe_counter;
+ atomic_set(&estate->nr_probing, alist->nr_addrs);
+
+ rcu_assign_pointer(server->endpoint_state, estate);
+ set_bit(AFS_ESTATE_SUPERSEDED, &old->flags);
+ write_unlock(&server->fs_lock);
+
+ trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+ afs_estate_trace_alloc_probe);
+
+ afs_get_address_preferences(net, alist);
server->probed_at = jiffies;
- atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
- memset(&server->probe, 0, sizeof(server->probe));
- server->probe.rtt = UINT_MAX;
-
- ac.index = ac.alist->preferred;
- if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
- all = true;
-
- if (all) {
- for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
- if (!afs_fs_get_capabilities(net, server, &ac, key))
- afs_fs_probe_not_done(net, server, &ac);
- } else {
- if (!afs_fs_get_capabilities(net, server, &ac, key))
- afs_fs_probe_not_done(net, server, &ac);
+ unprobed = (1UL << alist->nr_addrs) - 1;
+ while (unprobed) {
+ unsigned int index = 0, i;
+ int best_prio = -1;
+
+ for (i = 0; i < alist->nr_addrs; i++) {
+ if (test_bit(i, &unprobed) &&
+ alist->addrs[i].prio > best_prio) {
+ index = i;
+ best_prio = alist->addrs[i].prio;
+ }
+ }
+ __clear_bit(index, &unprobed);
+
+ trace_afs_fs_probe(server, true, estate, index, 0, 0, 0);
+ if (!afs_fs_get_capabilities(net, server, estate, index, key))
+ afs_fs_probe_not_done(net, server, estate, index);
}
- afs_put_addrlist(ac.alist);
+ afs_put_endpoint_state(old, afs_estate_trace_put_probe);
}
/*
- * Wait for the first as-yet untried fileserver to respond.
+ * Wait for the first as-yet untried fileserver to respond, for the probe state
+ * to be superseded or for all probes to finish.
*/
-int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr)
{
- struct wait_queue_entry *waits;
- struct afs_server *server;
- unsigned int rtt = UINT_MAX, rtt_s;
- bool have_responders = false;
- int pref = -1, i;
+ struct afs_endpoint_state *estate;
+ struct afs_server_list *slist = op->server_list;
+ bool still_probing = true;
+ int ret = 0, i;
- _enter("%u,%lx", slist->nr_servers, untried);
+ _enter("%u", slist->nr_servers);
- /* Only wait for servers that have a probe outstanding. */
for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- if (!atomic_read(&server->probe_outstanding))
- __clear_bit(i, &untried);
- if (server->probe.responded)
- have_responders = true;
- }
+ estate = states[i].endpoint_state;
+ if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
+ return 2;
+ if (atomic_read(&estate->nr_probing))
+ still_probing = true;
+ if (estate->responsive_set & states[i].untried_addrs)
+ return 1;
}
- if (have_responders || !untried)
+ if (!still_probing)
return 0;
- waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
- if (!waits)
- return -ENOMEM;
-
- for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- init_waitqueue_entry(&waits[i], current);
- add_wait_queue(&server->probe_wq, &waits[i]);
- }
- }
+ for (i = 0; i < slist->nr_servers; i++)
+ add_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
for (;;) {
- bool still_probing = false;
+ still_probing = false;
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- if (server->probe.responded)
- goto stop;
- if (atomic_read(&server->probe_outstanding))
- still_probing = true;
+ estate = states[i].endpoint_state;
+ if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) {
+ ret = 2;
+ goto stop;
+ }
+ if (atomic_read(&estate->nr_probing))
+ still_probing = true;
+ if (estate->responsive_set & states[i].untried_addrs) {
+ ret = 1;
+ goto stop;
}
}
@@ -288,28 +349,12 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
stop:
set_current_state(TASK_RUNNING);
- for (i = 0; i < slist->nr_servers; i++) {
- if (test_bit(i, &untried)) {
- server = slist->servers[i].server;
- rtt_s = READ_ONCE(server->rtt);
- if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
- rtt_s < rtt) {
- pref = i;
- rtt = rtt_s;
- }
-
- remove_wait_queue(&server->probe_wq, &waits[i]);
- }
- }
-
- kfree(waits);
-
- if (pref == -1 && signal_pending(current))
- return -ERESTARTSYS;
+ for (i = 0; i < slist->nr_servers; i++)
+ remove_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
- if (pref >= 0)
- slist->preferred = pref;
- return 0;
+ if (!ret && signal_pending(current))
+ ret = -ERESTARTSYS;
+ return ret;
}
/*
@@ -327,7 +372,7 @@ void afs_fs_probe_timer(struct timer_list *timer)
/*
* Dispatch a probe to a server.
*/
-static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
+static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server)
__releases(&net->fs_lock)
{
struct key *key = NULL;
@@ -340,7 +385,7 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server
afs_get_server(server, afs_server_trace_get_probe);
write_sequnlock(&net->fs_lock);
- afs_fs_probe_fileserver(net, server, key, all);
+ afs_fs_probe_fileserver(net, server, NULL, key);
afs_put_server(net, server, afs_server_trace_put_probe);
}
@@ -352,7 +397,7 @@ void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
{
write_seqlock(&net->fs_lock);
if (!list_empty(&server->probe_link))
- return afs_dispatch_fs_probe(net, server, true);
+ return afs_dispatch_fs_probe(net, server);
write_sequnlock(&net->fs_lock);
}
@@ -412,7 +457,7 @@ again:
_debug("probe %pU", &server->uuid);
if (server && (first_pass || !need_resched())) {
- afs_dispatch_fs_probe(net, server, server == fast);
+ afs_dispatch_fs_probe(net, server);
first_pass = false;
goto again;
}
@@ -436,12 +481,13 @@ again:
/*
* Wait for a probe on a particular fileserver to complete for 2s.
*/
-int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
+int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
+ unsigned long exclude, bool is_intr)
{
struct wait_queue_entry wait;
unsigned long timo = 2 * HZ;
- if (atomic_read(&server->probe_outstanding) == 0)
+ if (atomic_read(&estate->nr_probing) == 0)
goto dont_wait;
init_wait_entry(&wait, 0);
@@ -449,8 +495,9 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
prepare_to_wait_event(&server->probe_wq, &wait,
is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
if (timo == 0 ||
- server->probe.responded ||
- atomic_read(&server->probe_outstanding) == 0 ||
+ test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags) ||
+ (estate->responsive_set & ~exclude) ||
+ atomic_read(&estate->nr_probing) == 0 ||
(is_intr && signal_pending(current)))
break;
timo = schedule_timeout(timo);
@@ -459,7 +506,9 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
finish_wait(&server->probe_wq, &wait);
dont_wait:
- if (server->probe.responded)
+ if (estate->responsive_set & ~exclude)
+ return 1;
+ if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
return 0;
if (is_intr && signal_pending(current))
return -ERESTARTSYS;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 7d37f63ef0f0..79cd30775b7a 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -290,6 +290,7 @@ void afs_fs_fetch_status(struct afs_operation *op)
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -442,6 +443,7 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
bp[6] = 0;
bp[7] = htonl(lower_32_bits(req->len));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -476,6 +478,7 @@ void afs_fs_fetch_data(struct afs_operation *op)
bp[4] = htonl(lower_32_bits(req->pos));
bp[5] = htonl(lower_32_bits(req->len));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -559,6 +562,7 @@ void afs_fs_create_file(struct afs_operation *op)
*bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -612,6 +616,7 @@ void afs_fs_make_dir(struct afs_operation *op)
*bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -685,6 +690,7 @@ void afs_fs_remove_file(struct afs_operation *op)
bp = (void *) bp + padsz;
}
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -732,6 +738,7 @@ void afs_fs_remove_dir(struct afs_operation *op)
bp = (void *) bp + padsz;
}
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -812,6 +819,7 @@ void afs_fs_link(struct afs_operation *op)
*bp++ = htonl(vp->fid.vnode);
*bp++ = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call1(call, &vp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -907,6 +915,7 @@ void afs_fs_symlink(struct afs_operation *op)
*bp++ = htonl(S_IRWXUGO); /* unix mode */
*bp++ = 0; /* segment size */
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1003,6 +1012,7 @@ void afs_fs_rename(struct afs_operation *op)
bp = (void *) bp + n_padsz;
}
+ call->fid = orig_dvp->fid;
trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1090,6 +1100,7 @@ static void afs_fs_store_data64(struct afs_operation *op)
*bp++ = htonl(upper_32_bits(op->store.i_size));
*bp++ = htonl(lower_32_bits(op->store.i_size));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1140,6 +1151,7 @@ void afs_fs_store_data(struct afs_operation *op)
*bp++ = htonl(lower_32_bits(op->store.size));
*bp++ = htonl(lower_32_bits(op->store.i_size));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1206,6 +1218,7 @@ static void afs_fs_setattr_size64(struct afs_operation *op)
*bp++ = htonl(upper_32_bits(attr->ia_size)); /* new file length */
*bp++ = htonl(lower_32_bits(attr->ia_size));
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1247,6 +1260,7 @@ static void afs_fs_setattr_size(struct afs_operation *op)
*bp++ = 0; /* size of write */
*bp++ = htonl(attr->ia_size); /* new file length */
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1283,6 +1297,7 @@ void afs_fs_setattr(struct afs_operation *op)
xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1446,6 +1461,7 @@ void afs_fs_get_volume_status(struct afs_operation *op)
bp[0] = htonl(FSGETVOLUMESTATUS);
bp[1] = htonl(vp->fid.vid);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1528,6 +1544,7 @@ void afs_fs_set_lock(struct afs_operation *op)
*bp++ = htonl(vp->fid.unique);
*bp++ = htonl(op->lock.type);
+ call->fid = vp->fid;
trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1554,6 +1571,7 @@ void afs_fs_extend_lock(struct afs_operation *op)
*bp++ = htonl(vp->fid.vnode);
*bp++ = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1580,6 +1598,7 @@ void afs_fs_release_lock(struct afs_operation *op)
*bp++ = htonl(vp->fid.vnode);
*bp++ = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1605,13 +1624,12 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
/*
* Flush all the callbacks we have on a server.
*/
-int afs_fs_give_up_all_callbacks(struct afs_net *net,
- struct afs_server *server,
- struct afs_addr_cursor *ac,
- struct key *key)
+int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
+ struct afs_address *addr, struct key *key)
{
struct afs_call *call;
__be32 *bp;
+ int ret;
_enter("");
@@ -1619,15 +1637,22 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
if (!call)
return -ENOMEM;
- call->key = key;
+ call->key = key;
+ call->peer = rxrpc_kernel_get_peer(addr->peer);
+ call->service_id = server->service_id;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(FSGIVEUPALLCALLBACKS);
call->server = afs_use_server(server, afs_server_trace_give_up_cb);
- afs_make_call(ac, call, GFP_NOFS);
- return afs_wait_for_call_to_complete(call, ac);
+ afs_make_call(call, GFP_NOFS);
+ afs_wait_for_call_to_complete(call);
+ ret = call->error;
+ if (call->responded)
+ set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+ afs_put_call(call);
+ return ret;
}
/*
@@ -1689,6 +1714,12 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
return 0;
}
+static void afs_fs_get_capabilities_destructor(struct afs_call *call)
+{
+ afs_put_endpoint_state(call->probe, afs_estate_trace_put_getcaps);
+ afs_flat_call_destructor(call);
+}
+
/*
* FS.GetCapabilities operation type
*/
@@ -1697,7 +1728,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
.done = afs_fileserver_probe_result,
- .destructor = afs_flat_call_destructor,
+ .destructor = afs_fs_get_capabilities_destructor,
};
/*
@@ -1707,7 +1738,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
* ->done() - otherwise we return false to indicate we didn't even try.
*/
bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
- struct afs_addr_cursor *ac, struct key *key)
+ struct afs_endpoint_state *estate, unsigned int addr_index,
+ struct key *key)
{
struct afs_call *call;
__be32 *bp;
@@ -1718,10 +1750,14 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
if (!call)
return false;
- call->key = key;
- call->server = afs_use_server(server, afs_server_trace_get_caps);
- call->upgrade = true;
- call->async = true;
+ call->key = key;
+ call->server = afs_use_server(server, afs_server_trace_get_caps);
+ call->peer = rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer);
+ call->probe = afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps);
+ call->probe_index = addr_index;
+ call->service_id = server->service_id;
+ call->upgrade = true;
+ call->async = true;
call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
/* marshall the parameters */
@@ -1729,7 +1765,7 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
*bp++ = htonl(FSGETCAPABILITIES);
trace_afs_make_fs_call(call, NULL);
- afs_make_call(ac, call, GFP_NOFS);
+ afs_make_call(call, GFP_NOFS);
afs_put_call(call);
return true;
}
@@ -1853,7 +1889,10 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_AFSVolSync(&bp, &op->volsync);
+ /* Unfortunately, prior to OpenAFS-1.6, volsync here is filled
+ * with rubbish.
+ */
+ xdr_decode_AFSVolSync(&bp, NULL);
call->unmarshall++;
fallthrough;
@@ -1899,7 +1938,7 @@ void afs_fs_inline_bulk_status(struct afs_operation *op)
int i;
if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) {
- op->error = -ENOTSUPP;
+ afs_op_set_error(op, -ENOTSUPP);
return;
}
@@ -1928,6 +1967,7 @@ void afs_fs_inline_bulk_status(struct afs_operation *op)
*bp++ = htonl(op->more_files[i].fid.unique);
}
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -2033,6 +2073,7 @@ void afs_fs_fetch_acl(struct afs_operation *op)
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
@@ -2078,6 +2119,7 @@ void afs_fs_store_acl(struct afs_operation *op)
if (acl->size != size)
memset((void *)&bp[5] + acl->size, 0, size - acl->size);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 78efc9719349..4f04f6f33f46 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -85,8 +85,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
write_seqlock(&vnode->cb_lock);
- vnode->cb_v_break = op->cb_v_break;
- vnode->cb_s_break = op->cb_s_break;
+ vnode->cb_v_check = op->cb_v_break;
vnode->status = *status;
t = status->mtime_client;
@@ -146,11 +145,10 @@ static int afs_inode_init_from_status(struct afs_operation *op,
if (!vp->scb.have_cb) {
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
- vnode->cb_expires_at = ktime_get_real_seconds();
+ atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
} else {
- vnode->cb_expires_at = vp->scb.callback.expires_at;
vnode->cb_server = op->server;
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ atomic64_set(&vnode->cb_expires_at, vp->scb.callback.expires_at);
}
write_sequnlock(&vnode->cb_lock);
@@ -214,7 +212,8 @@ static void afs_apply_status(struct afs_operation *op,
vnode->status = *status;
if (vp->dv_before + vp->dv_delta != status->data_version) {
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
+ atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
vnode->fid.vid, vnode->fid.vnode,
(unsigned long long)vp->dv_before + vp->dv_delta,
@@ -268,9 +267,9 @@ static void afs_apply_callback(struct afs_operation *op,
struct afs_vnode *vnode = vp->vnode;
if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
- vnode->cb_expires_at = cb->expires_at;
- vnode->cb_server = op->server;
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ if (op->volume->type == AFSVL_RWVOL)
+ vnode->cb_server = op->server;
+ atomic64_set(&vnode->cb_expires_at, cb->expires_at);
}
}
@@ -331,7 +330,7 @@ static void afs_fetch_status_success(struct afs_operation *op)
if (vnode->netfs.inode.i_state & I_NEW) {
ret = afs_inode_init_from_status(op, vp, vnode);
- op->error = ret;
+ afs_op_set_error(op, ret);
if (ret == 0)
afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
} else {
@@ -542,7 +541,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key)
BUG_ON(!(inode->i_state & I_NEW));
vnode = AFS_FS_I(inode);
- vnode->cb_v_break = as->volume->cb_v_break,
+ vnode->cb_v_check = atomic_read(&as->volume->cb_v_break),
afs_set_netfs_context(vnode);
op = afs_alloc_operation(key, as->volume);
@@ -573,180 +572,6 @@ error:
}
/*
- * mark the data attached to an inode as obsolete due to a write on the server
- * - might also want to ditch all the outstanding writes and dirty pages
- */
-static void afs_zap_data(struct afs_vnode *vnode)
-{
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
- afs_invalidate_cache(vnode, 0);
-
- /* nuke all the non-dirty pages that aren't locked, mapped or being
- * written back in a regular file and completely discard the pages in a
- * directory or symlink */
- if (S_ISREG(vnode->netfs.inode.i_mode))
- invalidate_remote_inode(&vnode->netfs.inode);
- else
- invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
-}
-
-/*
- * Check to see if we have a server currently serving this volume and that it
- * hasn't been reinitialised or dropped from the list.
- */
-static bool afs_check_server_good(struct afs_vnode *vnode)
-{
- struct afs_server_list *slist;
- struct afs_server *server;
- bool good;
- int i;
-
- if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break))
- return true;
-
- rcu_read_lock();
-
- slist = rcu_dereference(vnode->volume->servers);
- for (i = 0; i < slist->nr_servers; i++) {
- server = slist->servers[i].server;
- if (server == vnode->cb_server) {
- good = (vnode->cb_s_break == server->cb_s_break);
- rcu_read_unlock();
- return good;
- }
- }
-
- rcu_read_unlock();
- return false;
-}
-
-/*
- * Check the validity of a vnode/inode.
- */
-bool afs_check_validity(struct afs_vnode *vnode)
-{
- enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
- time64_t now = ktime_get_real_seconds();
- unsigned int cb_break;
- int seq = 0;
-
- do {
- read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- cb_break = vnode->cb_break;
-
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_v_break != vnode->volume->cb_v_break)
- need_clear = afs_cb_break_for_v_break;
- else if (!afs_check_server_good(vnode))
- need_clear = afs_cb_break_for_s_reinit;
- else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
- need_clear = afs_cb_break_for_zap;
- else if (vnode->cb_expires_at - 10 <= now)
- need_clear = afs_cb_break_for_lapsed;
- } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- ;
- } else {
- need_clear = afs_cb_break_no_promise;
- }
-
- } while (need_seqretry(&vnode->cb_lock, seq));
-
- done_seqretry(&vnode->cb_lock, seq);
-
- if (need_clear == afs_cb_break_no_break)
- return true;
-
- write_seqlock(&vnode->cb_lock);
- if (need_clear == afs_cb_break_no_promise)
- vnode->cb_v_break = vnode->volume->cb_v_break;
- else if (cb_break == vnode->cb_break)
- __afs_break_callback(vnode, need_clear);
- else
- trace_afs_cb_miss(&vnode->fid, need_clear);
- write_sequnlock(&vnode->cb_lock);
- return false;
-}
-
-/*
- * Returns true if the pagecache is still valid. Does not sleep.
- */
-bool afs_pagecache_valid(struct afs_vnode *vnode)
-{
- if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) {
- if (vnode->netfs.inode.i_nlink)
- clear_nlink(&vnode->netfs.inode);
- return true;
- }
-
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
- afs_check_validity(vnode))
- return true;
-
- return false;
-}
-
-/*
- * validate a vnode/inode
- * - there are several things we need to check
- * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
- * symlink)
- * - parent dir metadata changed (security changes)
- * - dentry data changed (write, truncate)
- * - dentry metadata changed (security changes)
- */
-int afs_validate(struct afs_vnode *vnode, struct key *key)
-{
- int ret;
-
- _enter("{v={%llx:%llu} fl=%lx},%x",
- vnode->fid.vid, vnode->fid.vnode, vnode->flags,
- key_serial(key));
-
- if (afs_pagecache_valid(vnode))
- goto valid;
-
- down_write(&vnode->validate_lock);
-
- /* if the promise has expired, we need to check the server again to get
- * a new promise - note that if the (parent) directory's metadata was
- * changed then the security may be different and we may no longer have
- * access */
- if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- _debug("not promised");
- ret = afs_fetch_status(vnode, key, false, NULL);
- if (ret < 0) {
- if (ret == -ENOENT) {
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- ret = -ESTALE;
- }
- goto error_unlock;
- }
- _debug("new promise [fl=%lx]", vnode->flags);
- }
-
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- _debug("file already deleted");
- ret = -ESTALE;
- goto error_unlock;
- }
-
- /* if the vnode's data version number changed then its contents are
- * different */
- if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
- afs_zap_data(vnode);
- up_write(&vnode->validate_lock);
-valid:
- _leave(" = 0");
- return 0;
-
-error_unlock:
- up_write(&vnode->validate_lock);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* read the attributes of an inode
*/
int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
@@ -755,13 +580,13 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct inode *inode = d_inode(path->dentry);
struct afs_vnode *vnode = AFS_FS_I(inode);
struct key *key;
- int ret, seq = 0;
+ int ret, seq;
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
if (vnode->volume &&
!(query_flags & AT_STATX_DONT_SYNC) &&
- !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ atomic64_read(&vnode->cb_expires_at) == AFS_NO_CB_PROMISE) {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key))
return PTR_ERR(key);
@@ -772,7 +597,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
}
do {
- read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ seq = read_seqbegin(&vnode->cb_lock);
generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
stat->nlink > 0)
@@ -784,9 +609,8 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
*/
if (S_ISDIR(inode->i_mode))
stat->size = vnode->netfs.remote_i_size;
- } while (need_seqretry(&vnode->cb_lock, seq));
+ } while (read_seqretry(&vnode->cb_lock, seq));
- done_seqretry(&vnode->cb_lock, seq);
return 0;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7385d62c8cf5..e33ace259cc6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -33,6 +33,7 @@
struct pagevec;
struct afs_call;
struct afs_vnode;
+struct afs_server_probe;
/*
* Partial file-locking emulation mode. (The problem being that AFS3 only
@@ -73,21 +74,51 @@ enum afs_call_state {
};
/*
+ * Address preferences.
+ */
+struct afs_addr_preference {
+ union {
+ struct in_addr ipv4_addr; /* AF_INET address to compare against */
+ struct in6_addr ipv6_addr; /* AF_INET6 address to compare against */
+ };
+ sa_family_t family; /* Which address to use */
+ u16 prio; /* Priority */
+ u8 subnet_mask; /* How many bits to compare */
+};
+
+struct afs_addr_preference_list {
+ struct rcu_head rcu;
+ u16 version; /* Incremented when prefs list changes */
+ u8 ipv6_off; /* Offset of IPv6 addresses */
+ u8 nr; /* Number of addresses in total */
+ u8 max_prefs; /* Number of prefs allocated */
+ struct afs_addr_preference prefs[] __counted_by(max_prefs);
+};
+
+struct afs_address {
+ struct rxrpc_peer *peer;
+ short last_error; /* Last error from this address */
+ u16 prio; /* Address priority */
+};
+
+/*
* List of server addresses.
*/
struct afs_addr_list {
struct rcu_head rcu;
refcount_t usage;
u32 version; /* Version */
+ unsigned int debug_id;
+ unsigned int addr_pref_version; /* Version of address preference list */
unsigned char max_addrs;
unsigned char nr_addrs;
unsigned char preferred; /* Preferred address */
unsigned char nr_ipv4; /* Number of IPv4 addresses */
enum dns_record_source source:8;
enum dns_lookup_status status:8;
- unsigned long failed; /* Mask of addrs that failed locally/ICMP */
+ unsigned long probe_failed; /* Mask of addrs that failed locally/ICMP */
unsigned long responded; /* Mask of addrs that responded */
- struct sockaddr_rxrpc addrs[] __counted_by(max_addrs);
+ struct afs_address addrs[] __counted_by(max_addrs);
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};
@@ -96,11 +127,11 @@ struct afs_addr_list {
*/
struct afs_call {
const struct afs_call_type *type; /* type of call */
- struct afs_addr_list *alist; /* Address is alist[addr_ix] */
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */
+ struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
struct afs_server *server; /* The fileserver record if fs op (pins ref) */
@@ -116,11 +147,14 @@ struct afs_call {
};
void *buffer; /* reply receive buffer */
union {
- long ret0; /* Value to reply with instead of 0 */
+ struct afs_endpoint_state *probe;
+ struct afs_addr_list *vl_probe;
struct afs_addr_list *ret_alist;
struct afs_vldb_entry *ret_vldb;
char *ret_str;
};
+ struct afs_fid fid; /* Primary vnode ID (or all zeroes) */
+ unsigned char probe_index; /* Address in ->probe_alist */
struct afs_operation *op;
unsigned int server_index;
refcount_t ref;
@@ -133,13 +167,13 @@ struct afs_call {
unsigned reply_max; /* maximum size of reply */
unsigned count2; /* count used in unmarshalling */
unsigned char unmarshall; /* unmarshalling phase */
- unsigned char addr_ix; /* Address in ->alist */
bool drop_ref; /* T if need to drop ref for incoming call */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
+ bool responded; /* Got a response from the call (may be abort) */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
@@ -306,6 +340,8 @@ struct afs_net {
struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */
struct afs_sysnames *sysnames;
rwlock_t sysnames_lock;
+ struct afs_addr_preference_list __rcu *address_prefs;
+ u16 address_pref_version;
/* Statistics counters */
atomic_t n_lookup; /* Number of lookups done */
@@ -379,6 +415,7 @@ struct afs_cell {
unsigned int debug_id;
/* The volumes belonging to this cell */
+ struct rw_semaphore vs_lock; /* Lock for server->volumes */
struct rb_root volumes; /* Tree of volumes on this server */
struct hlist_head proc_volumes; /* procfs volume list */
seqlock_t volume_lock; /* For volumes */
@@ -386,9 +423,6 @@ struct afs_cell {
/* Active fileserver interaction state. */
struct rb_root fs_servers; /* afs_server (by server UUID) */
seqlock_t fs_lock; /* For fs_servers */
- struct rw_semaphore fs_open_mmaps_lock;
- struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */
- atomic_t fs_s_break; /* Counter of CB.InitCallBackState messages */
/* VL server list. */
rwlock_t vl_servers_lock; /* Lock on vl_servers */
@@ -412,13 +446,14 @@ struct afs_vlserver {
rwlock_t lock; /* Lock on addresses */
refcount_t ref;
unsigned int rtt; /* Server's current RTT in uS */
+ unsigned int debug_id;
/* Probe state */
wait_queue_head_t probe_wq;
atomic_t probe_outstanding;
spinlock_t probe_lock;
struct {
- unsigned int rtt; /* RTT in uS */
+ unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */
u32 abort_code;
short error;
unsigned short flags;
@@ -428,6 +463,7 @@ struct afs_vlserver {
#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */
} probe;
+ u16 service_id; /* Service ID we're using */
u16 port;
u16 name_len; /* Length of name */
char name[]; /* Server name, case-flattened */
@@ -477,6 +513,7 @@ struct afs_vldb_entry {
#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
+ u8 vlsf_flags[AFS_NMAXNSERVERS];
short error;
u8 nr_servers; /* Number of server records */
u8 name_len;
@@ -484,6 +521,32 @@ struct afs_vldb_entry {
};
/*
+ * Fileserver endpoint state. The records the addresses of a fileserver's
+ * endpoints and the state and result of a round of probing on them. This
+ * allows the rotation algorithm to access those results without them being
+ * erased by a subsequent round of probing.
+ */
+struct afs_endpoint_state {
+ struct rcu_head rcu;
+ struct afs_addr_list *addresses; /* The addresses being probed */
+ unsigned long responsive_set; /* Bitset of responsive endpoints */
+ unsigned long failed_set; /* Bitset of endpoints we failed to probe */
+ refcount_t ref;
+ unsigned int server_id; /* Debug ID of server */
+ unsigned int probe_seq; /* Probe sequence (from server::probe_counter) */
+ atomic_t nr_probing; /* Number of outstanding probes */
+ unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */
+ s32 abort_code;
+ short error;
+ unsigned long flags;
+#define AFS_ESTATE_RESPONDED 0 /* Set if the server responded */
+#define AFS_ESTATE_SUPERSEDED 1 /* Set if this record has been superseded */
+#define AFS_ESTATE_IS_YFS 2 /* Set if probe upgraded to YFS */
+#define AFS_ESTATE_NOT_YFS 3 /* Set if probe didn't upgrade to YFS */
+#define AFS_ESTATE_LOCAL_FAILURE 4 /* Set if there was a local failure (eg. ENOMEM) */
+};
+
+/*
* Record of fileserver with which we're actively communicating.
*/
struct afs_server {
@@ -493,7 +556,6 @@ struct afs_server {
struct afs_uuid _uuid;
};
- struct afs_addr_list __rcu *addresses;
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
struct rb_node uuid_rb; /* Link in net->fs_servers */
struct afs_server __rcu *uuid_next; /* Next server with same UUID */
@@ -502,7 +564,7 @@ struct afs_server {
struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
struct hlist_node proc_link; /* Link in net->fs_proc */
- struct work_struct initcb_work; /* Work for CB.InitCallBackState* */
+ struct list_head volumes; /* RCU list of afs_server_entry objects */
struct afs_server *gc_next; /* Next server in manager's list */
time64_t unuse_time; /* Time at which last unused */
unsigned long flags;
@@ -520,44 +582,47 @@ struct afs_server {
refcount_t ref; /* Object refcount */
atomic_t active; /* Active user count */
u32 addr_version; /* Address list version */
+ u16 service_id; /* Service ID we're using. */
unsigned int rtt; /* Server's current RTT in uS */
unsigned int debug_id; /* Debugging ID for traces */
/* file service access */
rwlock_t fs_lock; /* access lock */
- /* callback promise management */
- unsigned cb_s_break; /* Break-everything counter. */
-
/* Probe state */
+ struct afs_endpoint_state __rcu *endpoint_state; /* Latest endpoint/probe state */
unsigned long probed_at; /* Time last probe was dispatched (jiffies) */
wait_queue_head_t probe_wq;
- atomic_t probe_outstanding;
+ unsigned int probe_counter; /* Number of probes issued */
spinlock_t probe_lock;
- struct {
- unsigned int rtt; /* RTT in uS */
- u32 abort_code;
- short error;
- bool responded:1;
- bool is_yfs:1;
- bool not_yfs:1;
- bool local_failure:1;
- } probe;
};
+enum afs_ro_replicating {
+ AFS_RO_NOT_REPLICATING, /* Not doing replication */
+ AFS_RO_REPLICATING_USE_OLD, /* Replicating; use old version */
+ AFS_RO_REPLICATING_USE_NEW, /* Replicating; switch to new version */
+} __mode(byte);
+
/*
* Replaceable volume server list.
*/
struct afs_server_entry {
struct afs_server *server;
+ struct afs_volume *volume;
+ struct list_head slink; /* Link in server->volumes */
+ time64_t cb_expires_at; /* Time at which volume-level callback expires */
+ unsigned long flags;
+#define AFS_SE_EXCLUDED 0 /* Set if server is to be excluded in rotation */
+#define AFS_SE_VOLUME_OFFLINE 1 /* Set if volume offline notice given */
+#define AFS_SE_VOLUME_BUSY 2 /* Set if volume busy notice given */
};
struct afs_server_list {
struct rcu_head rcu;
- afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */
refcount_t usage;
+ bool attached; /* T if attached to servers */
+ enum afs_ro_replicating ro_replicating; /* RW->RO update (probably) in progress */
unsigned char nr_servers;
- unsigned char preferred; /* Preferred server */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
rwlock_t lock;
@@ -568,25 +633,23 @@ struct afs_server_list {
* Live AFS volume management.
*/
struct afs_volume {
- union {
- struct rcu_head rcu;
- afs_volid_t vid; /* volume ID */
- };
+ struct rcu_head rcu;
+ afs_volid_t vid; /* The volume ID of this volume */
+ afs_volid_t vids[AFS_MAXTYPES]; /* All associated volume IDs */
refcount_t ref;
time64_t update_at; /* Time at which to next update */
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
struct rb_node cell_node; /* Link in cell->volumes */
struct hlist_node proc_link; /* Link in cell->proc_volumes */
struct super_block __rcu *sb; /* Superblock on which inodes reside */
+ struct work_struct destructor; /* Deferred destructor */
unsigned long flags;
#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */
#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */
#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */
#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */
-#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
-#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
-#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
-#define AFS_VOLUME_RM_TREE 7 /* - Set if volume removed from cell->volumes */
+#define AFS_VOLUME_MAYBE_NO_IBULK 4 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE 5 /* - Set if volume removed from cell->volumes */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_volume *cache; /* Caching cookie */
#endif
@@ -594,8 +657,21 @@ struct afs_volume {
rwlock_t servers_lock; /* Lock for ->servers */
unsigned int servers_seq; /* Incremented each time ->servers changes */
- unsigned cb_v_break; /* Break-everything counter. */
+ /* RO release tracking */
+ struct mutex volsync_lock; /* Time/state evaluation lock */
+ time64_t creation_time; /* Volume creation time (or TIME64_MIN) */
+ time64_t update_time; /* Volume update time (or TIME64_MIN) */
+
+ /* Callback management */
+ struct mutex cb_check_lock; /* Lock to control race to check after v_break */
+ time64_t cb_expires_at; /* Earliest volume callback expiry time */
+ atomic_t cb_ro_snapshot; /* RO volume update-from-snapshot counter */
+ atomic_t cb_v_break; /* Volume-break event counter. */
+ atomic_t cb_v_check; /* Volume-break has-been-checked counter. */
+ atomic_t cb_scrub; /* Scrub-all-data event counter. */
rwlock_t cb_v_break_lock;
+ struct rw_semaphore open_mmaps_lock;
+ struct list_head open_mmaps; /* List of vnodes that are mmapped */
afs_voltype_t type; /* type of volume */
char type_force; /* force volume type (suppress R/O -> R/W) */
@@ -634,7 +710,6 @@ struct afs_vnode {
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
-#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
@@ -660,13 +735,14 @@ struct afs_vnode {
struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */
void *cb_server; /* Server with callback/filelock */
atomic_t cb_nr_mmap; /* Number of mmaps */
- unsigned int cb_fs_s_break; /* Mass server break counter (cell->fs_s_break) */
- unsigned int cb_s_break; /* Mass break counter on ->server */
- unsigned int cb_v_break; /* Mass break counter on ->volume */
+ unsigned int cb_ro_snapshot; /* RO volume release counter on ->volume */
+ unsigned int cb_scrub; /* Scrub counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
+ unsigned int cb_v_check; /* Break check counter on ->volume */
seqlock_t cb_lock; /* Lock for ->cb_server, ->status, ->cb_*break */
- time64_t cb_expires_at; /* time at which callback expires */
+ atomic64_t cb_expires_at; /* time at which callback expires */
+#define AFS_NO_CB_PROMISE TIME64_MIN
};
static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
@@ -714,40 +790,49 @@ struct afs_permits {
* Error prioritisation and accumulation.
*/
struct afs_error {
- short error; /* Accumulated error */
+ s32 abort_code; /* Cumulative abort code */
+ short error; /* Cumulative error */
bool responded; /* T if server responded */
-};
-
-/*
- * Cursor for iterating over a server's address list.
- */
-struct afs_addr_cursor {
- struct afs_addr_list *alist; /* Current address list (pins ref) */
- unsigned long tried; /* Tried addresses */
- signed char index; /* Current address */
- bool responded; /* T if the current address responded */
- unsigned short nr_iterations; /* Number of address iterations */
- short error;
- u32 abort_code;
+ bool aborted; /* T if ->error is from an abort */
};
/*
* Cursor for iterating over a set of volume location servers.
*/
struct afs_vl_cursor {
- struct afs_addr_cursor ac;
struct afs_cell *cell; /* The cell we're querying */
struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
struct afs_vlserver *server; /* Server on which this resides */
+ struct afs_addr_list *alist; /* Current address list (pins ref) */
struct key *key; /* Key for the server */
- unsigned long untried; /* Bitmask of untried servers */
- short index; /* Current server */
- short error;
+ unsigned long untried_servers; /* Bitmask of untried servers */
+ unsigned long addr_tried; /* Tried addresses */
+ struct afs_error cumul_error; /* Cumulative error */
+ unsigned int debug_id;
+ s32 call_abort_code;
+ short call_error; /* Error from single call */
+ short server_index; /* Current server */
+ signed char addr_index; /* Current address */
unsigned short flags;
#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */
#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */
- unsigned short nr_iterations; /* Number of server iterations */
+ short nr_iterations; /* Number of server iterations */
+ bool call_responded; /* T if the current address responded */
+};
+
+/*
+ * Fileserver state tracking for an operation. An array of these is kept,
+ * indexed by server index.
+ */
+struct afs_server_state {
+ /* Tracking of fileserver probe state. Other operations may interfere
+ * by probing a fileserver when accessing other volumes.
+ */
+ unsigned int probe_seq;
+ unsigned long untried_addrs; /* Addresses we haven't tried yet */
+ struct wait_queue_entry probe_waiter;
+ struct afs_endpoint_state *endpoint_state; /* Endpoint state being monitored */
};
/*
@@ -768,7 +853,7 @@ struct afs_vnode_param {
struct afs_fid fid; /* Fid to access */
struct afs_status_cb scb; /* Returned status and callback promise */
afs_dataversion_t dv_before; /* Data version before the call */
- unsigned int cb_break_before; /* cb_break + cb_s_break before the call */
+ unsigned int cb_break_before; /* cb_break before the call */
u8 dv_delta; /* Expected change in data version */
bool put_vnode:1; /* T if we have a ref on the vnode */
bool need_io_lock:1; /* T if we need the I/O lock on this */
@@ -793,17 +878,17 @@ struct afs_operation {
struct afs_volume *volume; /* Volume being accessed */
struct afs_vnode_param file[2];
struct afs_vnode_param *more_files;
- struct afs_volsync volsync;
+ struct afs_volsync pre_volsync; /* Volsync before op */
+ struct afs_volsync volsync; /* Volsync returned by op */
struct dentry *dentry; /* Dentry to be altered */
struct dentry *dentry_2; /* Second dentry to be altered */
struct timespec64 mtime; /* Modification time to record */
struct timespec64 ctime; /* Change time to set */
+ struct afs_error cumul_error; /* Cumulative error */
short nr_files; /* Number of entries in file[], more_files */
- short error;
unsigned int debug_id;
unsigned int cb_v_break; /* Volume break counter before op */
- unsigned int cb_s_break; /* Server break counter before op */
union {
struct {
@@ -848,13 +933,19 @@ struct afs_operation {
};
/* Fileserver iteration state */
- struct afs_addr_cursor ac;
struct afs_server_list *server_list; /* Current server list (pins ref) */
struct afs_server *server; /* Server we're using (ref pinned by server_list) */
+ struct afs_endpoint_state *estate; /* Current endpoint state (doesn't pin ref) */
+ struct afs_server_state *server_states; /* States of the servers involved */
struct afs_call *call;
- unsigned long untried; /* Bitmask of untried servers */
- short index; /* Current server */
- unsigned short nr_iterations; /* Number of server iterations */
+ unsigned long untried_servers; /* Bitmask of untried servers */
+ unsigned long addr_tried; /* Tried addresses */
+ s32 call_abort_code; /* Abort code from single call */
+ short call_error; /* Error from single call */
+ short server_index; /* Current server */
+ short nr_iterations; /* Number of server iterations */
+ signed char addr_index; /* Current address */
+ bool call_responded; /* T if the current address responded */
unsigned int flags;
#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
@@ -956,31 +1047,32 @@ static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
/*
* addr_list.c
*/
-static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
-{
- if (alist)
- refcount_inc(&alist->usage);
- return alist;
-}
-extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
- unsigned short,
- unsigned short);
-extern void afs_put_addrlist(struct afs_addr_list *);
+struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason);
+extern struct afs_addr_list *afs_alloc_addrlist(unsigned int nr);
+extern void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason);
extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
const char *, size_t, char,
unsigned short, unsigned short);
+bool afs_addr_list_same(const struct afs_addr_list *a,
+ const struct afs_addr_list *b);
extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
-extern bool afs_iterate_addresses(struct afs_addr_cursor *);
-extern int afs_end_cursor(struct afs_addr_cursor *);
-extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
-extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
+extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr,
+ __be32 xdr, u16 port);
+extern int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *addr,
+ __be32 *xdr, u16 port);
+
+/*
+ * addr_prefs.c
+ */
+int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size);
+void afs_get_address_preferences_rcu(struct afs_net *net, struct afs_addr_list *alist);
+void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alist);
/*
* callback.c
*/
extern void afs_invalidate_mmap_work(struct work_struct *);
-extern void afs_server_init_callback_work(struct work_struct *work);
extern void afs_init_callback_state(struct afs_server *);
extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
@@ -988,13 +1080,15 @@ extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback
static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
{
- return vnode->cb_break + vnode->cb_v_break;
+ return vnode->cb_break + vnode->cb_ro_snapshot + vnode->cb_scrub;
}
static inline bool afs_cb_is_broken(unsigned int cb_break,
const struct afs_vnode *vnode)
{
- return cb_break != (vnode->cb_break + vnode->volume->cb_v_break);
+ return cb_break != (vnode->cb_break +
+ atomic_read(&vnode->volume->cb_ro_snapshot) +
+ atomic_read(&vnode->volume->cb_scrub));
}
/*
@@ -1110,10 +1204,11 @@ extern void afs_fs_get_volume_status(struct afs_operation *);
extern void afs_fs_set_lock(struct afs_operation *);
extern void afs_fs_extend_lock(struct afs_operation *);
extern void afs_fs_release_lock(struct afs_operation *);
-extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
- struct afs_addr_cursor *, struct key *);
-extern bool afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
- struct afs_addr_cursor *, struct key *);
+int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
+ struct afs_address *addr, struct key *key);
+bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
+ struct afs_endpoint_state *estate, unsigned int addr_index,
+ struct key *key);
extern void afs_fs_inline_bulk_status(struct afs_operation *);
struct afs_acl {
@@ -1133,11 +1228,6 @@ extern bool afs_begin_vnode_operation(struct afs_operation *);
extern void afs_wait_for_operation(struct afs_operation *);
extern int afs_do_sync_operation(struct afs_operation *);
-static inline void afs_op_nomem(struct afs_operation *op)
-{
- op->error = -ENOMEM;
-}
-
static inline void afs_op_set_vnode(struct afs_operation *op, unsigned int n,
struct afs_vnode *vnode)
{
@@ -1154,12 +1244,17 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
/*
* fs_probe.c
*/
+struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
+ enum afs_estate_trace where);
+void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where);
extern void afs_fileserver_probe_result(struct afs_call *);
-extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool);
-extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct afs_addr_list *new_addrs, struct key *key);
+int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr);
extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
extern void afs_fs_probe_dispatcher(struct work_struct *);
-extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
+int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
+ unsigned long exclude, bool is_intr);
extern void afs_fs_probe_cleanup(struct afs_net *);
/*
@@ -1173,9 +1268,6 @@ extern int afs_ilookup5_test_by_fid(struct inode *, void *);
extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
extern struct inode *afs_root_iget(struct super_block *, struct key *);
-extern bool afs_check_validity(struct afs_vnode *);
-extern int afs_validate(struct afs_vnode *, struct key *);
-bool afs_pagecache_valid(struct afs_vnode *);
extern int afs_getattr(struct mnt_idmap *idmap, const struct path *,
struct kstat *, u32, unsigned int);
extern int afs_setattr(struct mnt_idmap *idmap, struct dentry *, struct iattr *);
@@ -1231,6 +1323,31 @@ static inline void __afs_stat(atomic_t *s)
extern int afs_abort_to_error(u32);
extern void afs_prioritise_error(struct afs_error *, int, u32);
+static inline void afs_op_nomem(struct afs_operation *op)
+{
+ op->cumul_error.error = -ENOMEM;
+}
+
+static inline int afs_op_error(const struct afs_operation *op)
+{
+ return op->cumul_error.error;
+}
+
+static inline s32 afs_op_abort_code(const struct afs_operation *op)
+{
+ return op->cumul_error.abort_code;
+}
+
+static inline int afs_op_set_error(struct afs_operation *op, int error)
+{
+ return op->cumul_error.error = error;
+}
+
+static inline void afs_op_accumulate_error(struct afs_operation *op, int error, s32 abort_code)
+{
+ afs_prioritise_error(&op->cumul_error, error, abort_code);
+}
+
/*
* mntpt.c
*/
@@ -1261,6 +1378,7 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {}
/*
* rotate.c
*/
+void afs_clear_server_states(struct afs_operation *op);
extern bool afs_select_fileserver(struct afs_operation *);
extern void afs_dump_edestaddrreq(const struct afs_operation *);
@@ -1273,8 +1391,8 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
-extern void afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t);
-extern long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
+void afs_make_call(struct afs_call *call, gfp_t gfp);
+void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
size_t, size_t);
@@ -1287,12 +1405,16 @@ extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
gfp_t gfp)
{
- op->call = call;
- op->type = call->type;
- call->op = op;
- call->key = op->key;
- call->intr = !(op->flags & AFS_OPERATION_UNINTR);
- afs_make_call(&op->ac, call, gfp);
+ struct afs_addr_list *alist = op->estate->addresses;
+
+ op->call = call;
+ op->type = call->type;
+ call->op = op;
+ call->key = op->key;
+ call->intr = !(op->flags & AFS_OPERATION_UNINTR);
+ call->peer = rxrpc_kernel_get_peer(alist->addrs[op->addr_index].peer);
+ call->service_id = op->server->service_id;
+ afs_make_call(call, gfp);
}
static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
@@ -1401,8 +1523,7 @@ extern void __exit afs_clean_up_permit_cache(void);
*/
extern spinlock_t afs_server_peer_lock;
-extern struct afs_server *afs_find_server(struct afs_net *,
- const struct sockaddr_rxrpc *);
+extern struct afs_server *afs_find_server(struct afs_net *, const struct rxrpc_peer *);
extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32);
extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
@@ -1414,7 +1535,7 @@ extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
extern void afs_fs_probe_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_check_server_record(struct afs_operation *, struct afs_server *);
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key);
static inline void afs_inc_servers_outstanding(struct afs_net *net)
{
@@ -1442,10 +1563,14 @@ static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list
}
extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
-extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
- struct afs_vldb_entry *,
- u8);
+struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
+ struct key *key,
+ struct afs_vldb_entry *vldb);
extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
+void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist);
+void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist,
+ struct afs_server_list *old);
+void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist);
/*
* super.c
@@ -1454,13 +1579,24 @@ extern int __init afs_fs_init(void);
extern void afs_fs_exit(void);
/*
+ * validation.c
+ */
+bool afs_check_validity(const struct afs_vnode *vnode);
+int afs_update_volume_state(struct afs_operation *op);
+int afs_validate(struct afs_vnode *vnode, struct key *key);
+
+/*
* vlclient.c
*/
extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
-extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *,
- struct key *, struct afs_vlserver *, unsigned int);
+struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
+ struct afs_addr_list *alist,
+ unsigned int addr_index,
+ struct key *key,
+ struct afs_vlserver *server,
+ unsigned int server_index);
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *);
@@ -1516,7 +1652,7 @@ extern int afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
-extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
+void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason);
extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
/*
@@ -1603,7 +1739,7 @@ static inline void afs_update_dentry_version(struct afs_operation *op,
struct afs_vnode_param *dir_vp,
struct dentry *dentry)
{
- if (!op->error)
+ if (!op->cumul_error.error)
dentry->d_fsdata =
(void *)(unsigned long)dir_vp->scb.status.data_version;
}
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 6425c81d07de..1b3bd21c168a 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -156,6 +156,7 @@ static void __net_exit afs_net_exit(struct net *net_ns)
afs_close_socket(net);
afs_proc_cleanup(net);
afs_put_sysnames(net->sysnames);
+ kfree_rcu(rcu_access_pointer(net->address_prefs), rcu);
}
static struct pernet_operations afs_net_ops = {
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 805328ca5428..b8180bf2281f 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -116,6 +116,8 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
{
switch (error) {
case 0:
+ e->aborted = false;
+ e->error = 0;
return;
default:
if (e->error == -ETIMEDOUT ||
@@ -161,12 +163,16 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
if (e->responded)
return;
e->error = error;
+ e->aborted = false;
return;
case -ECONNABORTED:
- error = afs_abort_to_error(abort_code);
- fallthrough;
+ e->error = afs_abort_to_error(abort_code);
+ e->aborted = true;
+ e->responded = true;
+ return;
case -ENETRESET: /* Responded, but we seem to have changed address */
+ e->aborted = false;
e->responded = true;
e->error = error;
return;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 2a0c83d71565..3bd02571f30d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -147,6 +147,55 @@ inval:
}
/*
+ * Display the list of addr_prefs known to the namespace.
+ */
+static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
+{
+ struct afs_addr_preference_list *preflist;
+ struct afs_addr_preference *pref;
+ struct afs_net *net = afs_seq2net_single(m);
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } addr;
+ unsigned int i;
+ char buf[44]; /* Maximum ipv6 + max subnet is 43 */
+
+ rcu_read_lock();
+ preflist = rcu_dereference(net->address_prefs);
+
+ if (!preflist) {
+ seq_puts(m, "NO PREFS\n");
+ return 0;
+ }
+
+ seq_printf(m, "PROT SUBNET PRIOR (v=%u n=%u/%u/%u)\n",
+ preflist->version, preflist->ipv6_off, preflist->nr, preflist->max_prefs);
+
+ memset(&addr, 0, sizeof(addr));
+
+ for (i = 0; i < preflist->nr; i++) {
+ pref = &preflist->prefs[i];
+
+ addr.sin.sin_family = pref->family;
+ if (pref->family == AF_INET) {
+ memcpy(&addr.sin.sin_addr, &pref->ipv4_addr,
+ sizeof(addr.sin.sin_addr));
+ snprintf(buf, sizeof(buf), "%pISc/%u", &addr.sin, pref->subnet_mask);
+ seq_printf(m, "UDP %-43.43s %5u\n", buf, pref->prio);
+ } else {
+ memcpy(&addr.sin6.sin6_addr, &pref->ipv6_addr,
+ sizeof(addr.sin6.sin6_addr));
+ snprintf(buf, sizeof(buf), "%pISc/%u", &addr.sin6, pref->subnet_mask);
+ seq_printf(m, "UDP %-43.43s %5u\n", buf, pref->prio);
+ }
+ }
+
+ rcu_read_lock();
+ return 0;
+}
+
+/*
* Display the name of the current workstation cell.
*/
static int afs_proc_rootcell_show(struct seq_file *m, void *v)
@@ -307,7 +356,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
for (i = 0; i < alist->nr_addrs; i++)
seq_printf(m, " %c %pISpc\n",
alist->preferred == i ? '>' : '-',
- &alist->addrs[i].transport);
+ rxrpc_kernel_remote_addr(alist->addrs[i].peer));
}
seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt);
seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n",
@@ -375,32 +424,45 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = {
*/
static int afs_proc_servers_show(struct seq_file *m, void *v)
{
- struct afs_server *server;
+ struct afs_endpoint_state *estate;
struct afs_addr_list *alist;
+ struct afs_server *server;
+ unsigned long failed;
int i;
if (v == SEQ_START_TOKEN) {
- seq_puts(m, "UUID REF ACT\n");
+ seq_puts(m, "UUID REF ACT CELL\n");
return 0;
}
server = list_entry(v, struct afs_server, proc_link);
- alist = rcu_dereference(server->addresses);
- seq_printf(m, "%pU %3d %3d\n",
+ estate = rcu_dereference(server->endpoint_state);
+ alist = estate->addresses;
+ seq_printf(m, "%pU %3d %3d %s\n",
&server->uuid,
refcount_read(&server->ref),
- atomic_read(&server->active));
- seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n",
- server->flags, server->rtt, server->cb_s_break);
- seq_printf(m, " - probe: last=%d out=%d\n",
- (int)(jiffies - server->probed_at) / HZ,
- atomic_read(&server->probe_outstanding));
- seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n",
- alist->version, alist->responded, alist->failed);
- for (i = 0; i < alist->nr_addrs; i++)
- seq_printf(m, " [%x] %pISpc%s\n",
- i, &alist->addrs[i].transport,
- alist->preferred == i ? "*" : "");
+ atomic_read(&server->active),
+ server->cell->name);
+ seq_printf(m, " - info: fl=%lx rtt=%u\n",
+ server->flags, server->rtt);
+ seq_printf(m, " - probe: last=%d\n",
+ (int)(jiffies - server->probed_at) / HZ);
+ failed = estate->failed_set;
+ seq_printf(m, " - ESTATE pq=%x np=%u rsp=%lx f=%lx\n",
+ estate->probe_seq, atomic_read(&estate->nr_probing),
+ estate->responsive_set, estate->failed_set);
+ seq_printf(m, " - ALIST v=%u ap=%u\n",
+ alist->version, alist->addr_pref_version);
+ for (i = 0; i < alist->nr_addrs; i++) {
+ const struct afs_address *addr = &alist->addrs[i];
+
+ seq_printf(m, " [%x] %pISpc%s rtt=%d err=%d p=%u\n",
+ i, rxrpc_kernel_remote_addr(addr->peer),
+ alist->preferred == i ? "*" :
+ test_bit(i, &failed) ? "!" : "",
+ rxrpc_kernel_get_srtt(addr->peer),
+ addr->last_error, addr->prio);
+ }
return 0;
}
@@ -681,7 +743,11 @@ int afs_proc_init(struct afs_net *net)
&afs_proc_sysname_ops,
afs_proc_sysname_write,
sizeof(struct seq_net_private),
- NULL))
+ NULL) ||
+ !proc_create_net_single_write("addr_prefs", 0644, p,
+ afs_proc_addr_prefs_show,
+ afs_proc_addr_prefs_write,
+ NULL))
goto error_tree;
net->proc_afs = p;
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index a840c3588ebb..700a27bc8c25 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -13,6 +13,19 @@
#include <linux/sched/signal.h>
#include "internal.h"
#include "afs_fs.h"
+#include "protocol_uae.h"
+
+void afs_clear_server_states(struct afs_operation *op)
+{
+ unsigned int i;
+
+ if (op->server_states) {
+ for (i = 0; i < op->server_list->nr_servers; i++)
+ afs_put_endpoint_state(op->server_states[i].endpoint_state,
+ afs_estate_trace_put_server_state);
+ kfree(op->server_states);
+ }
+}
/*
* Begin iteration through a server list, starting with the vnode's last used
@@ -25,14 +38,41 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
void *cb_server;
int i;
+ trace_afs_rotate(op, afs_rotate_trace_start, 0);
+
read_lock(&op->volume->servers_lock);
op->server_list = afs_get_serverlist(
rcu_dereference_protected(op->volume->servers,
lockdep_is_held(&op->volume->servers_lock)));
read_unlock(&op->volume->servers_lock);
- op->untried = (1UL << op->server_list->nr_servers) - 1;
- op->index = READ_ONCE(op->server_list->preferred);
+ op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]),
+ GFP_KERNEL);
+ if (!op->server_states) {
+ afs_op_nomem(op);
+ trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
+ return false;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < op->server_list->nr_servers; i++) {
+ struct afs_endpoint_state *estate;
+ struct afs_server_state *s = &op->server_states[i];
+
+ server = op->server_list->servers[i].server;
+ estate = rcu_dereference(server->endpoint_state);
+ s->endpoint_state = afs_get_endpoint_state(estate,
+ afs_estate_trace_get_server_state);
+ s->probe_seq = estate->probe_seq;
+ s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
+ init_waitqueue_entry(&s->probe_waiter, current);
+ afs_get_address_preferences(op->net, estate->addresses);
+ }
+ rcu_read_unlock();
+
+
+ op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
+ op->server_index = -1;
cb_server = vnode->cb_server;
if (cb_server) {
@@ -40,7 +80,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
for (i = 0; i < op->server_list->nr_servers; i++) {
server = op->server_list->servers[i].server;
if (server == cb_server) {
- op->index = i;
+ op->server_index = i;
goto found_interest;
}
}
@@ -50,7 +90,8 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
* and have to return an error.
*/
if (op->flags & AFS_OPERATION_CUR_ONLY) {
- op->error = -ESTALE;
+ afs_op_set_error(op, -ESTALE);
+ trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
return false;
}
@@ -58,7 +99,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
write_seqlock(&vnode->cb_lock);
ASSERTCMP(cb_server, ==, vnode->cb_server);
vnode->cb_server = NULL;
- if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
vnode->cb_break++;
write_sequnlock(&vnode->cb_lock);
}
@@ -70,7 +111,7 @@ found_interest:
/*
* Post volume busy note.
*/
-static void afs_busy(struct afs_volume *volume, u32 abort_code)
+static void afs_busy(struct afs_operation *op, u32 abort_code)
{
const char *m;
@@ -81,7 +122,8 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
default: m = "busy"; break;
}
- pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
+ pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
+ op->volume->vid, op->volume->name, &op->server->uuid, m);
}
/*
@@ -89,10 +131,11 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
*/
static bool afs_sleep_and_retry(struct afs_operation *op)
{
+ trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
if (!(op->flags & AFS_OPERATION_UNINTR)) {
msleep_interruptible(1000);
if (signal_pending(current)) {
- op->error = -ERESTARTSYS;
+ afs_op_set_error(op, -ERESTARTSYS);
return false;
}
} else {
@@ -111,62 +154,105 @@ bool afs_select_fileserver(struct afs_operation *op)
struct afs_addr_list *alist;
struct afs_server *server;
struct afs_vnode *vnode = op->file[0].vnode;
- struct afs_error e;
- u32 rtt;
- int error = op->ac.error, i;
+ unsigned long set, failed;
+ s32 abort_code = op->call_abort_code;
+ int best_prio = 0;
+ int error = op->call_error, addr_index, i, j;
+
+ op->nr_iterations++;
- _enter("%lx[%d],%lx[%d],%d,%d",
- op->untried, op->index,
- op->ac.tried, op->ac.index,
- error, op->ac.abort_code);
+ _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
+ op->debug_id, op->nr_iterations, op->volume->vid,
+ op->server_index, op->untried_servers,
+ op->addr_index, op->addr_tried,
+ error, abort_code);
if (op->flags & AFS_OPERATION_STOP) {
+ trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
_leave(" = f [stopped]");
return false;
}
- op->nr_iterations++;
-
- /* Evaluate the result of the previous operation, if there was one. */
- switch (error) {
- case SHRT_MAX:
+ if (op->nr_iterations == 0)
goto start;
+ WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
+ trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
+
+ /* Evaluate the result of the previous operation, if there was one. */
+ switch (op->call_error) {
case 0:
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
+ op->cumul_error.responded = true;
+
+ /* We succeeded, but we may need to redo the op from another
+ * server if we're looking at a set of RO volumes where some of
+ * the servers have not yet been brought up to date lest we
+ * regress the data. We only switch to the new version once
+ * >=50% of the servers are updated.
+ */
+ error = afs_update_volume_state(op);
+ if (error != 0) {
+ if (error == 1) {
+ afs_sleep_and_retry(op);
+ goto restart_from_beginning;
+ }
+ afs_op_set_error(op, error);
+ goto failed;
+ }
+ fallthrough;
default:
/* Success or local failure. Stop. */
- op->error = error;
+ afs_op_set_error(op, error);
op->flags |= AFS_OPERATION_STOP;
+ trace_afs_rotate(op, afs_rotate_trace_stop, error);
_leave(" = f [okay/local %d]", error);
return false;
case -ECONNABORTED:
/* The far side rejected the operation on some grounds. This
* might involve the server being busy or the volume having been moved.
+ *
+ * Note that various V* errors should not be sent to a cache manager
+ * by a fileserver as they should be translated to more modern UAE*
+ * errors instead. IBM AFS and OpenAFS fileservers, however, do leak
+ * these abort codes.
*/
- switch (op->ac.abort_code) {
+ trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
+ op->cumul_error.responded = true;
+ switch (abort_code) {
case VNOVOL:
/* This fileserver doesn't know about the volume.
* - May indicate that the VL is wrong - retry once and compare
* the results.
* - May indicate that the fileserver couldn't attach to the vol.
+ * - The volume might have been temporarily removed so that it can
+ * be replaced by a volume restore. "vos" might have ended one
+ * transaction and has yet to create the next.
+ * - The volume might not be blessed or might not be in-service
+ * (administrative action).
*/
if (op->flags & AFS_OPERATION_VNOVOL) {
- op->error = -EREMOTEIO;
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
goto next_server;
}
write_lock(&op->volume->servers_lock);
- op->server_list->vnovol_mask |= 1 << op->index;
+ op->server_list->vnovol_mask |= 1 << op->server_index;
write_unlock(&op->volume->servers_lock);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
error = afs_check_volume_status(op->volume, op);
- if (error < 0)
- goto failed_set_error;
+ if (error < 0) {
+ afs_op_set_error(op, error);
+ goto failed;
+ }
if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
- op->error = -ENOMEDIUM;
+ afs_op_set_error(op, -ENOMEDIUM);
goto failed;
}
@@ -174,7 +260,7 @@ bool afs_select_fileserver(struct afs_operation *op)
* it's the fileserver having trouble.
*/
if (rcu_access_pointer(op->volume->servers) == op->server_list) {
- op->error = -EREMOTEIO;
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
goto next_server;
}
@@ -183,50 +269,99 @@ bool afs_select_fileserver(struct afs_operation *op)
_leave(" = t [vnovol]");
return true;
- case VSALVAGE: /* TODO: Should this return an error or iterate? */
case VVOLEXISTS:
- case VNOSERVICE:
case VONLINE:
- case VDISKFULL:
- case VOVERQUOTA:
- op->error = afs_abort_to_error(op->ac.abort_code);
+ /* These should not be returned from the fileserver. */
+ pr_warn("Fileserver returned unexpected abort %d\n",
+ abort_code);
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
goto next_server;
+ case VNOSERVICE:
+ /* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
+ * if the volume was neither in-service nor administratively
+ * blessed. All usage was replaced by VNOVOL because AFS 3.1 and
+ * earlier cache managers did not handle VNOSERVICE and assumed
+ * it was the client OSes errno 105.
+ *
+ * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
+ * fileserver idle dead time error which was sent in place of
+ * RX_CALL_TIMEOUT (-3). The error was intended to be sent if the
+ * fileserver took too long to send a reply to the client.
+ * RX_CALL_TIMEOUT would have caused the cache manager to mark the
+ * server down whereas VNOSERVICE since AFS 3.2 would cause cache
+ * manager to temporarily (up to 15 minutes) mark the volume
+ * instance as unusable.
+ *
+ * The idle dead logic resulted in cache inconsistency since a
+ * state changing call that the cache manager assumed was dead
+ * could still be processed to completion by the fileserver. This
+ * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
+ * returned. However, many 1.4.8 through 1.6.24 fileservers are
+ * still in existence.
+ *
+ * AuriStorFS fileservers have never returned VNOSERVICE.
+ *
+ * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
+ */
+ case RX_CALL_TIMEOUT:
+ afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
+ goto next_server;
+
+ case VSALVAGING: /* This error should not be leaked to cache managers
+ * but is from OpenAFS demand attach fileservers.
+ * It should be treated as an alias for VOFFLINE.
+ */
+ case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
case VOFFLINE:
- if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
- afs_busy(op->volume, op->ac.abort_code);
- clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+ /* The volume is in use by the volserver or another volume utility
+ * for an operation that might alter the contents. The volume is
+ * expected to come back but it might take a long time (could be
+ * days).
+ */
+ if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags)) {
+ afs_busy(op, abort_code);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
}
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
- op->error = -EADV;
- goto failed;
- }
- if (op->flags & AFS_OPERATION_CUR_ONLY) {
- op->error = -ESTALE;
+ afs_op_set_error(op, -EADV);
goto failed;
}
goto busy;
- case VSALVAGING:
- case VRESTARTING:
+ case VRESTARTING: /* The fileserver is either shutting down or starting up. */
case VBUSY:
- /* Retry after going round all the servers unless we
- * have a file lock we need to maintain.
+ /* The volume is in use by the volserver or another volume
+ * utility for an operation that is not expected to alter the
+ * contents of the volume. VBUSY does not need to be returned
+ * for a ROVOL or BACKVOL bound to an ITBusy volserver
+ * transaction. The fileserver is permitted to continue serving
+ * content from ROVOLs and BACKVOLs during an ITBusy transaction
+ * because the content will not change. However, many fileserver
+ * releases do return VBUSY for ROVOL and BACKVOL instances under
+ * many circumstances.
+ *
+ * Retry after going round all the servers unless we have a file
+ * lock we need to maintain.
*/
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
- op->error = -EBUSY;
+ afs_op_set_error(op, -EBUSY);
goto failed;
}
- if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
- afs_busy(op->volume, op->ac.abort_code);
- clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
+ if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags)) {
+ afs_busy(op, abort_code);
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
}
busy:
if (op->flags & AFS_OPERATION_CUR_ONLY) {
if (!afs_sleep_and_retry(op))
goto failed;
- /* Retry with same server & address */
+ /* Retry with same server & address */
_leave(" = t [vbusy]");
return true;
}
@@ -243,7 +378,7 @@ bool afs_select_fileserver(struct afs_operation *op)
* honour, just in case someone sets up a loop.
*/
if (op->flags & AFS_OPERATION_VMOVED) {
- op->error = -EREMOTEIO;
+ afs_op_set_error(op, -EREMOTEIO);
goto failed;
}
op->flags |= AFS_OPERATION_VMOVED;
@@ -251,8 +386,10 @@ bool afs_select_fileserver(struct afs_operation *op)
set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
error = afs_check_volume_status(op->volume, op);
- if (error < 0)
- goto failed_set_error;
+ if (error < 0) {
+ afs_op_set_error(op, error);
+ goto failed;
+ }
/* If the server list didn't change, then the VLDB is
* out of sync with the fileservers. This is hopefully
@@ -264,22 +401,50 @@ bool afs_select_fileserver(struct afs_operation *op)
* TODO: Retry a few times with sleeps.
*/
if (rcu_access_pointer(op->volume->servers) == op->server_list) {
- op->error = -ENOMEDIUM;
+ afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
goto failed;
}
goto restart_from_beginning;
+ case UAEIO:
+ case VIO:
+ afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
+ if (op->volume->type != AFSVL_RWVOL)
+ goto next_server;
+ goto failed;
+
+ case VDISKFULL:
+ case UAENOSPC:
+ /* The partition is full. Only applies to RWVOLs.
+ * Translate locally and return ENOSPC.
+ * No replicas to failover to.
+ */
+ afs_op_set_error(op, -ENOSPC);
+ goto failed_but_online;
+
+ case VOVERQUOTA:
+ case UAEDQUOT:
+ /* Volume is full. Only applies to RWVOLs.
+ * Translate locally and return EDQUOT.
+ * No replicas to failover to.
+ */
+ afs_op_set_error(op, -EDQUOT);
+ goto failed_but_online;
+
default:
- clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
- clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
- op->error = afs_abort_to_error(op->ac.abort_code);
+ afs_op_accumulate_error(op, error, abort_code);
+ failed_but_online:
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
goto failed;
}
case -ETIMEDOUT:
case -ETIME:
- if (op->error != -EDESTADDRREQ)
+ if (afs_op_error(op) != -EDESTADDRREQ)
goto iterate_address;
fallthrough;
case -ERFKILL:
@@ -289,7 +454,7 @@ bool afs_select_fileserver(struct afs_operation *op)
case -EHOSTDOWN:
case -ECONNREFUSED:
_debug("no conn");
- op->error = error;
+ afs_op_accumulate_error(op, error, 0);
goto iterate_address;
case -ENETRESET:
@@ -298,24 +463,31 @@ bool afs_select_fileserver(struct afs_operation *op)
fallthrough;
case -ECONNRESET:
_debug("call reset");
- op->error = error;
+ afs_op_set_error(op, error);
goto failed;
}
restart_from_beginning:
+ trace_afs_rotate(op, afs_rotate_trace_restart, 0);
_debug("restart");
- afs_end_cursor(&op->ac);
+ op->estate = NULL;
op->server = NULL;
+ afs_clear_server_states(op);
+ op->server_states = NULL;
afs_put_serverlist(op->net, op->server_list);
op->server_list = NULL;
start:
_debug("start");
+ ASSERTCMP(op->estate, ==, NULL);
/* See if we need to do an update of the volume record. Note that the
* volume may have moved or even have been deleted.
*/
error = afs_check_volume_status(op->volume, op);
- if (error < 0)
- goto failed_set_error;
+ trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
+ if (error < 0) {
+ afs_op_set_error(op, error);
+ goto failed;
+ }
if (!afs_start_fs_iteration(op, vnode))
goto failed;
@@ -323,52 +495,83 @@ start:
_debug("__ VOL %llx __", op->volume->vid);
pick_server:
- _debug("pick [%lx]", op->untried);
+ _debug("pick [%lx]", op->untried_servers);
+ ASSERTCMP(op->estate, ==, NULL);
- error = afs_wait_for_fs_probes(op->server_list, op->untried);
- if (error < 0)
- goto failed_set_error;
+ error = afs_wait_for_fs_probes(op, op->server_states,
+ !(op->flags & AFS_OPERATION_UNINTR));
+ switch (error) {
+ case 0: /* No untried responsive servers and no outstanding probes */
+ trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
+ goto no_more_servers;
+ case 1: /* Got a response */
+ trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
+ break;
+ case 2: /* Probe data superseded */
+ trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
+ goto restart_from_beginning;
+ default:
+ trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
+ afs_op_set_error(op, error);
+ goto failed;
+ }
- /* Pick the untried server with the lowest RTT. If we have outstanding
- * callbacks, we stick with the server we're already using if we can.
+ /* Pick the untried server with the highest priority untried endpoint.
+ * If we have outstanding callbacks, we stick with the server we're
+ * already using if we can.
*/
if (op->server) {
- _debug("server %u", op->index);
- if (test_bit(op->index, &op->untried))
+ _debug("server %u", op->server_index);
+ if (test_bit(op->server_index, &op->untried_servers))
goto selected_server;
op->server = NULL;
_debug("no server");
}
- op->index = -1;
- rtt = U32_MAX;
+ rcu_read_lock();
+ op->server_index = -1;
+ best_prio = -1;
for (i = 0; i < op->server_list->nr_servers; i++) {
- struct afs_server *s = op->server_list->servers[i].server;
+ struct afs_endpoint_state *es;
+ struct afs_server_entry *se = &op->server_list->servers[i];
+ struct afs_addr_list *sal;
+ struct afs_server *s = se->server;
- if (!test_bit(i, &op->untried) ||
+ if (!test_bit(i, &op->untried_servers) ||
+ test_bit(AFS_SE_EXCLUDED, &se->flags) ||
!test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
continue;
- if (s->probe.rtt < rtt) {
- op->index = i;
- rtt = s->probe.rtt;
+ es = op->server_states->endpoint_state;
+ sal = es->addresses;
+
+ afs_get_address_preferences_rcu(op->net, sal);
+ for (j = 0; j < sal->nr_addrs; j++) {
+ if (!sal->addrs[j].peer)
+ continue;
+ if (sal->addrs[j].prio > best_prio) {
+ op->server_index = i;
+ best_prio = sal->addrs[j].prio;
+ }
}
}
+ rcu_read_unlock();
- if (op->index == -1)
+ if (op->server_index == -1)
goto no_more_servers;
selected_server:
- _debug("use %d", op->index);
- __clear_bit(op->index, &op->untried);
+ trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
+ _debug("use %d prio %u", op->server_index, best_prio);
+ __clear_bit(op->server_index, &op->untried_servers);
/* We're starting on a different fileserver from the list. We need to
* check it, create a callback intercept, find its address list and
* probe its capabilities before we use it.
*/
- ASSERTCMP(op->ac.alist, ==, NULL);
- server = op->server_list->servers[op->index].server;
+ ASSERTCMP(op->estate, ==, NULL);
+ server = op->server_list->servers[op->server_index].server;
- if (!afs_check_server_record(op, server))
+ if (!afs_check_server_record(op, server, op->key))
goto failed;
_debug("USING SERVER: %pU", &server->uuid);
@@ -377,58 +580,73 @@ selected_server:
op->server = server;
if (vnode->cb_server != server) {
vnode->cb_server = server;
- vnode->cb_s_break = server->cb_s_break;
- vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
- vnode->cb_v_break = vnode->volume->cb_v_break;
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
+ atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
}
- read_lock(&server->fs_lock);
- alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->fs_lock));
- afs_get_addrlist(alist);
- read_unlock(&server->fs_lock);
-
retry_server:
- memset(&op->ac, 0, sizeof(op->ac));
-
- if (!op->ac.alist)
- op->ac.alist = alist;
- else
- afs_put_addrlist(alist);
-
- op->ac.index = -1;
+ op->addr_tried = 0;
+ op->addr_index = -1;
iterate_address:
- ASSERT(op->ac.alist);
/* Iterate over the current server's address list to try and find an
* address on which it will respond to us.
*/
- if (!afs_iterate_addresses(&op->ac))
- goto out_of_addresses;
+ op->estate = op->server_states[op->server_index].endpoint_state;
+ set = READ_ONCE(op->estate->responsive_set);
+ failed = READ_ONCE(op->estate->failed_set);
+ _debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
+ set &= ~(failed | op->addr_tried);
+ trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
+ if (!set)
+ goto wait_for_more_probe_results;
+
+ alist = op->estate->addresses;
+ for (i = 0; i < alist->nr_addrs; i++) {
+ if (alist->addrs[i].prio > best_prio) {
+ addr_index = i;
+ best_prio = alist->addrs[i].prio;
+ }
+ }
- _debug("address [%u] %u/%u %pISp",
- op->index, op->ac.index, op->ac.alist->nr_addrs,
- &op->ac.alist->addrs[op->ac.index].transport);
+ addr_index = READ_ONCE(alist->preferred);
+ if (!test_bit(addr_index, &set))
+ addr_index = __ffs(set);
+ op->addr_index = addr_index;
+ set_bit(addr_index, &op->addr_tried);
+
+ op->volsync.creation = TIME64_MIN;
+ op->volsync.update = TIME64_MIN;
+ op->call_responded = false;
+ _debug("address [%u] %u/%u %pISp",
+ op->server_index, addr_index, alist->nr_addrs,
+ rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
_leave(" = t");
return true;
-out_of_addresses:
+wait_for_more_probe_results:
+ error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
+ !(op->flags & AFS_OPERATION_UNINTR));
+ if (!error)
+ goto iterate_address;
+
/* We've now had a failure to respond on all of a server's addresses -
* immediately probe them again and consider retrying the server.
*/
+ trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
afs_probe_fileserver(op->net, op->server);
if (op->flags & AFS_OPERATION_RETRY_SERVER) {
- alist = op->ac.alist;
- error = afs_wait_for_one_fs_probe(
- op->server, !(op->flags & AFS_OPERATION_UNINTR));
+ error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
+ !(op->flags & AFS_OPERATION_UNINTR));
switch (error) {
case 0:
op->flags &= ~AFS_OPERATION_RETRY_SERVER;
+ trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
goto retry_server;
case -ERESTARTSYS:
- goto failed_set_error;
+ afs_op_set_error(op, error);
+ goto failed;
case -ETIME:
case -EDESTADDRREQ:
goto next_server;
@@ -436,34 +654,51 @@ out_of_addresses:
}
next_server:
+ trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
_debug("next");
- afs_end_cursor(&op->ac);
+ ASSERT(op->estate);
+ alist = op->estate->addresses;
+ if (op->call_responded &&
+ op->addr_index != READ_ONCE(alist->preferred) &&
+ test_bit(alist->preferred, &op->addr_tried))
+ WRITE_ONCE(alist->preferred, op->addr_index);
+ op->estate = NULL;
goto pick_server;
no_more_servers:
/* That's all the servers poked to no good effect. Try again if some
* of them were busy.
*/
- if (op->flags & AFS_OPERATION_VBUSY)
+ trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
+ if (op->flags & AFS_OPERATION_VBUSY) {
+ afs_sleep_and_retry(op);
+ op->flags &= ~AFS_OPERATION_VBUSY;
goto restart_from_beginning;
+ }
- e.error = -EDESTADDRREQ;
- e.responded = false;
+ rcu_read_lock();
for (i = 0; i < op->server_list->nr_servers; i++) {
- struct afs_server *s = op->server_list->servers[i].server;
+ struct afs_endpoint_state *estate;
- afs_prioritise_error(&e, READ_ONCE(s->probe.error),
- s->probe.abort_code);
+ estate = op->server_states->endpoint_state;
+ error = READ_ONCE(estate->error);
+ if (error < 0)
+ afs_op_accumulate_error(op, error, estate->abort_code);
}
+ rcu_read_unlock();
- error = e.error;
-
-failed_set_error:
- op->error = error;
failed:
+ trace_afs_rotate(op, afs_rotate_trace_failed, 0);
op->flags |= AFS_OPERATION_STOP;
- afs_end_cursor(&op->ac);
- _leave(" = f [failed %d]", op->error);
+ if (op->estate) {
+ alist = op->estate->addresses;
+ if (op->call_responded &&
+ op->addr_index != READ_ONCE(alist->preferred) &&
+ test_bit(alist->preferred, &op->addr_tried))
+ WRITE_ONCE(alist->preferred, op->addr_index);
+ op->estate = NULL;
+ }
+ _leave(" = f [failed %d]", afs_op_error(op));
return false;
}
@@ -482,37 +717,40 @@ void afs_dump_edestaddrreq(const struct afs_operation *op)
rcu_read_lock();
pr_notice("EDESTADDR occurred\n");
- pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
+ pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
op->file[0].cb_break_before,
- op->file[1].cb_break_before, op->flags, op->error);
- pr_notice("FC: ut=%lx ix=%d ni=%u\n",
- op->untried, op->index, op->nr_iterations);
+ op->file[1].cb_break_before, op->flags, op->cumul_error.error);
+ pr_notice("OP: ut=%lx ix=%d ni=%u\n",
+ op->untried_servers, op->server_index, op->nr_iterations);
+ pr_notice("OP: call er=%d ac=%d r=%u\n",
+ op->call_error, op->call_abort_code, op->call_responded);
if (op->server_list) {
const struct afs_server_list *sl = op->server_list;
- pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
- sl->nr_servers, sl->preferred, sl->vnovol_mask);
+
+ pr_notice("FC: SL nr=%u vnov=%hx\n",
+ sl->nr_servers, sl->vnovol_mask);
for (i = 0; i < sl->nr_servers; i++) {
const struct afs_server *s = sl->servers[i].server;
+ const struct afs_endpoint_state *e =
+ rcu_dereference(s->endpoint_state);
+ const struct afs_addr_list *a = e->addresses;
+
pr_notice("FC: server fl=%lx av=%u %pU\n",
s->flags, s->addr_version, &s->uuid);
- if (s->addresses) {
- const struct afs_addr_list *a =
- rcu_dereference(s->addresses);
+ pr_notice("FC: - pq=%x R=%lx F=%lx\n",
+ e->probe_seq, e->responsive_set, e->failed_set);
+ if (a) {
pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
a->version,
a->nr_ipv4, a->nr_addrs, a->max_addrs,
a->preferred);
- pr_notice("FC: - R=%lx F=%lx\n",
- a->responded, a->failed);
- if (a == op->ac.alist)
+ if (a == e->addresses)
pr_notice("FC: - current\n");
}
}
}
- pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
- op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
- op->ac.responded, op->ac.nr_iterations);
+ pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
rcu_read_unlock();
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d642d06a453b..c453428f3c8b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -178,6 +178,8 @@ void afs_put_call(struct afs_call *call)
ASSERT(!work_pending(&call->async_work));
ASSERT(call->type->name != NULL);
+ rxrpc_kernel_put_peer(call->peer);
+
if (call->rxcall) {
rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
rxrpc_kernel_put_call(net->socket, call->rxcall);
@@ -187,7 +189,6 @@ void afs_put_call(struct afs_call *call)
call->type->destructor(call);
afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
- afs_put_addrlist(call->alist);
kfree(call->request);
trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
@@ -294,9 +295,8 @@ static void afs_notify_end_request_tx(struct sock *sock,
* Initiate a call and synchronously queue up the parameters for dispatch. Any
* error is stored into the call struct, which the caller must check for.
*/
-void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
+void afs_make_call(struct afs_call *call, gfp_t gfp)
{
- struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
@@ -304,7 +304,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
s64 tx_total_len;
int ret;
- _enter(",{%pISp},", &srx->transport);
+ _enter(",{%pISp+%u},", rxrpc_kernel_remote_addr(call->peer), call->service_id);
ASSERT(call->type != NULL);
ASSERT(call->type->name != NULL);
@@ -313,8 +313,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
call, call->type->name, key_serial(call->key),
atomic_read(&call->net->nr_outstanding_calls));
- call->addr_ix = ac->index;
- call->alist = afs_get_addrlist(ac->alist);
+ trace_afs_make_call(call);
/* Work out the length we're going to transmit. This is awkward for
* calls such as FS.StoreData where there's an extra injection of data
@@ -333,7 +332,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
}
/* create a call */
- rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
+ rxcall = rxrpc_kernel_begin_call(call->net->socket, call->peer, call->key,
(unsigned long)call,
tx_total_len,
call->max_lifespan,
@@ -341,6 +340,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
(call->async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter),
+ call->service_id,
call->upgrade,
(call->intr ? RXRPC_PREINTERRUPTIBLE :
RXRPC_UNINTERRUPTIBLE),
@@ -390,7 +390,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
/* Note that at this point, we may have received the reply or an abort
* - and an asynchronous call may already have completed.
*
- * afs_wait_for_call_to_complete(call, ac)
+ * afs_wait_for_call_to_complete(call)
* must be called to synchronously clean up.
*/
return;
@@ -406,8 +406,7 @@ error_do_abort:
rxrpc_kernel_recv_data(call->net->socket, rxcall,
&msg.msg_iter, &len, false,
&call->abort_code, &call->service_id);
- ac->abort_code = call->abort_code;
- ac->responded = true;
+ call->responded = true;
}
call->error = ret;
trace_afs_call_done(call);
@@ -427,7 +426,7 @@ error_kill_call:
afs_set_call_complete(call, ret, 0);
}
- ac->error = ret;
+ call->error = ret;
call->state = AFS_CALL_COMPLETE;
_leave(" = %d", ret);
}
@@ -461,7 +460,7 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort)
max = m + 1;
pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n",
msg, call->type->name,
- &call->alist->addrs[call->addr_ix].transport);
+ rxrpc_kernel_remote_addr(call->peer));
}
}
@@ -508,6 +507,7 @@ static void afs_deliver_to_call(struct afs_call *call)
ret = -EBADMSG;
switch (ret) {
case 0:
+ call->responded = true;
afs_queue_call_work(call);
if (state == AFS_CALL_CL_PROC_REPLY) {
if (call->op)
@@ -522,9 +522,11 @@ static void afs_deliver_to_call(struct afs_call *call)
goto out;
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+ call->responded = true;
afs_log_error(call, call->abort_code);
goto done;
case -ENOTSUPP:
+ call->responded = true;
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret,
@@ -571,50 +573,46 @@ call_complete:
}
/*
- * Wait synchronously for a call to complete and clean up the call struct.
+ * Wait synchronously for a call to complete.
*/
-long afs_wait_for_call_to_complete(struct afs_call *call,
- struct afs_addr_cursor *ac)
+void afs_wait_for_call_to_complete(struct afs_call *call)
{
- long ret;
bool rxrpc_complete = false;
- DECLARE_WAITQUEUE(myself, current);
-
_enter("");
- ret = call->error;
- if (ret < 0)
- goto out;
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
+ DECLARE_WAITQUEUE(myself, current);
+
+ add_wait_queue(&call->waitq, &myself);
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /* deliver any messages that are in the queue */
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
+ call->need_attention) {
+ call->need_attention = false;
+ __set_current_state(TASK_RUNNING);
+ afs_deliver_to_call(call);
+ continue;
+ }
- add_wait_queue(&call->waitq, &myself);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- /* deliver any messages that are in the queue */
- if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
- call->need_attention) {
- call->need_attention = false;
- __set_current_state(TASK_RUNNING);
- afs_deliver_to_call(call);
- continue;
- }
+ if (afs_check_call_state(call, AFS_CALL_COMPLETE))
+ break;
- if (afs_check_call_state(call, AFS_CALL_COMPLETE))
- break;
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
+ /* rxrpc terminated the call. */
+ rxrpc_complete = true;
+ break;
+ }
- if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
- /* rxrpc terminated the call. */
- rxrpc_complete = true;
- break;
+ schedule();
}
- schedule();
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
}
- remove_wait_queue(&call->waitq, &myself);
- __set_current_state(TASK_RUNNING);
-
if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
if (rxrpc_complete) {
afs_set_call_complete(call, call->error, call->abort_code);
@@ -627,29 +625,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
afs_set_call_complete(call, -EINTR, 0);
}
}
-
- spin_lock_bh(&call->state_lock);
- ac->abort_code = call->abort_code;
- ac->error = call->error;
- spin_unlock_bh(&call->state_lock);
-
- ret = ac->error;
- switch (ret) {
- case 0:
- ret = call->ret0;
- call->ret0 = 0;
-
- fallthrough;
- case -ECONNABORTED:
- ac->responded = true;
- break;
- }
-
-out:
- _debug("call complete");
- afs_put_call(call);
- _leave(" = %p", (void *)ret);
- return ret;
}
/*
diff --git a/fs/afs/server.c b/fs/afs/server.c
index b5237206eac3..e169121f603e 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -21,13 +21,13 @@ static void __afs_put_server(struct afs_net *, struct afs_server *);
/*
* Find a server by one of its addresses.
*/
-struct afs_server *afs_find_server(struct afs_net *net,
- const struct sockaddr_rxrpc *srx)
+struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer *peer)
{
+ const struct afs_endpoint_state *estate;
const struct afs_addr_list *alist;
struct afs_server *server = NULL;
unsigned int i;
- int seq = 0, diff;
+ int seq = 1;
rcu_read_lock();
@@ -35,39 +35,15 @@ struct afs_server *afs_find_server(struct afs_net *net,
if (server)
afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
server = NULL;
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
- if (srx->transport.family == AF_INET6) {
- const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
- hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
- alist = rcu_dereference(server->addresses);
- for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
- b = &alist->addrs[i].transport.sin6;
- diff = ((u16 __force)a->sin6_port -
- (u16 __force)b->sin6_port);
- if (diff == 0)
- diff = memcmp(&a->sin6_addr,
- &b->sin6_addr,
- sizeof(struct in6_addr));
- if (diff == 0)
- goto found;
- }
- }
- } else {
- const struct sockaddr_in *a = &srx->transport.sin, *b;
- hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
- alist = rcu_dereference(server->addresses);
- for (i = 0; i < alist->nr_ipv4; i++) {
- b = &alist->addrs[i].transport.sin;
- diff = ((u16 __force)a->sin_port -
- (u16 __force)b->sin_port);
- if (diff == 0)
- diff = ((u32 __force)a->sin_addr.s_addr -
- (u32 __force)b->sin_addr.s_addr);
- if (diff == 0)
- goto found;
- }
- }
+ hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+ estate = rcu_dereference(server->endpoint_state);
+ alist = estate->addresses;
+ for (i = 0; i < alist->nr_addrs; i++)
+ if (alist->addrs[i].peer == peer)
+ goto found;
}
server = NULL;
@@ -90,7 +66,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
{
struct afs_server *server = NULL;
struct rb_node *p;
- int diff, seq = 0;
+ int diff, seq = 1;
_enter("%pU", uuid);
@@ -102,7 +78,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
if (server)
afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
server = NULL;
-
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_lock, &seq);
p = net->fs_servers.rb_node;
@@ -137,6 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
static struct afs_server *afs_install_server(struct afs_cell *cell,
struct afs_server *candidate)
{
+ const struct afs_endpoint_state *estate;
const struct afs_addr_list *alist;
struct afs_server *server, *next;
struct afs_net *net = cell->net;
@@ -188,8 +165,9 @@ static struct afs_server *afs_install_server(struct afs_cell *cell,
added_dup:
write_seqlock(&net->fs_addr_lock);
- alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&net->fs_addr_lock.lock));
+ estate = rcu_dereference_protected(server->endpoint_state,
+ lockdep_is_held(&net->fs_addr_lock.lock));
+ alist = estate->addresses;
/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
* it in the IPv4 and/or IPv6 reverse-map lists.
@@ -219,6 +197,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
const uuid_t *uuid,
struct afs_addr_list *alist)
{
+ struct afs_endpoint_state *estate;
struct afs_server *server;
struct afs_net *net = cell->net;
@@ -228,25 +207,41 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
if (!server)
goto enomem;
+ estate = kzalloc(sizeof(struct afs_endpoint_state), GFP_KERNEL);
+ if (!estate)
+ goto enomem_server;
+
refcount_set(&server->ref, 1);
atomic_set(&server->active, 1);
server->debug_id = atomic_inc_return(&afs_server_debug_id);
- RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
rwlock_init(&server->fs_lock);
- INIT_WORK(&server->initcb_work, afs_server_init_callback_work);
+ INIT_LIST_HEAD(&server->volumes);
init_waitqueue_head(&server->probe_wq);
INIT_LIST_HEAD(&server->probe_link);
spin_lock_init(&server->probe_lock);
server->cell = cell;
server->rtt = UINT_MAX;
+ server->service_id = FS_SERVICE;
+
+ server->probe_counter = 1;
+ server->probed_at = jiffies - LONG_MAX / 2;
+ refcount_set(&estate->ref, 1);
+ estate->addresses = alist;
+ estate->server_id = server->debug_id;
+ estate->probe_seq = 1;
+ rcu_assign_pointer(server->endpoint_state, estate);
afs_inc_servers_outstanding(net);
trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc);
+ trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+ afs_estate_trace_alloc_server);
_leave(" = %p", server);
return server;
+enomem_server:
+ kfree(server);
enomem:
_leave(" = NULL [nomem]");
return NULL;
@@ -301,20 +296,20 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
candidate = afs_alloc_server(cell, uuid, alist);
if (!candidate) {
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_server_oom);
return ERR_PTR(-ENOMEM);
}
server = afs_install_server(cell, candidate);
if (server != candidate) {
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_server_dup);
kfree(candidate);
} else {
/* Immediately dispatch an asynchronous probe to each interface
* on the fileserver. This will make sure the repeat-probing
* service is started.
*/
- afs_fs_probe_fileserver(cell->net, server, key, true);
+ afs_fs_probe_fileserver(cell->net, server, alist, key);
}
return server;
@@ -447,7 +442,8 @@ static void afs_server_rcu(struct rcu_head *rcu)
trace_afs_server(server->debug_id, refcount_read(&server->ref),
atomic_read(&server->active), afs_server_trace_free);
- afs_put_addrlist(rcu_access_pointer(server->addresses));
+ afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state),
+ afs_estate_trace_put_server);
kfree(server);
}
@@ -459,14 +455,10 @@ static void __afs_put_server(struct afs_net *net, struct afs_server *server)
static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server)
{
- struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
- struct afs_addr_cursor ac = {
- .alist = alist,
- .index = alist->preferred,
- .error = 0,
- };
-
- afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ struct afs_endpoint_state *estate = rcu_access_pointer(server->endpoint_state);
+ struct afs_addr_list *alist = estate->addresses;
+
+ afs_fs_give_up_all_callbacks(net, server, &alist->addrs[alist->preferred], NULL);
}
/*
@@ -477,7 +469,6 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
afs_give_up_callbacks(net, server);
- flush_work(&server->initcb_work);
afs_put_server(net, server, afs_server_trace_destroy);
}
@@ -636,9 +627,12 @@ void afs_purge_servers(struct afs_net *net)
* Get an update for a server's address list.
*/
static noinline bool afs_update_server_record(struct afs_operation *op,
- struct afs_server *server)
+ struct afs_server *server,
+ struct key *key)
{
- struct afs_addr_list *alist, *discard;
+ struct afs_endpoint_state *estate;
+ struct afs_addr_list *alist;
+ bool has_addrs;
_enter("");
@@ -648,29 +642,27 @@ static noinline bool afs_update_server_record(struct afs_operation *op,
alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid);
if (IS_ERR(alist)) {
+ rcu_read_lock();
+ estate = rcu_dereference(server->endpoint_state);
+ has_addrs = estate->addresses;
+ rcu_read_unlock();
+
if ((PTR_ERR(alist) == -ERESTARTSYS ||
PTR_ERR(alist) == -EINTR) &&
(op->flags & AFS_OPERATION_UNINTR) &&
- server->addresses) {
+ has_addrs) {
_leave(" = t [intr]");
return true;
}
- op->error = PTR_ERR(alist);
- _leave(" = f [%d]", op->error);
+ afs_op_set_error(op, PTR_ERR(alist));
+ _leave(" = f [%d]", afs_op_error(op));
return false;
}
- discard = alist;
- if (server->addr_version != alist->version) {
- write_lock(&server->fs_lock);
- discard = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->fs_lock));
- rcu_assign_pointer(server->addresses, alist);
- server->addr_version = alist->version;
- write_unlock(&server->fs_lock);
- }
+ if (server->addr_version != alist->version)
+ afs_fs_probe_fileserver(op->net, server, alist, key);
- afs_put_addrlist(discard);
+ afs_put_addrlist(alist, afs_alist_trace_put_server_update);
_leave(" = t");
return true;
}
@@ -678,7 +670,8 @@ static noinline bool afs_update_server_record(struct afs_operation *op,
/*
* See if a server's address list needs updating.
*/
-bool afs_check_server_record(struct afs_operation *op, struct afs_server *server)
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server,
+ struct key *key)
{
bool success;
int ret, retries = 0;
@@ -698,7 +691,7 @@ retry:
update:
if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
- success = afs_update_server_record(op, server);
+ success = afs_update_server_record(op, server, key);
clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
_leave(" = %d", success);
@@ -710,7 +703,7 @@ wait:
(op->flags & AFS_OPERATION_UNINTR) ?
TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
if (ret == -ERESTARTSYS) {
- op->error = ret;
+ afs_op_set_error(op, ret);
_leave(" = f [intr]");
return false;
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index b59896b1de0a..7e7e567a7f8a 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -24,35 +24,62 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
/*
* Build a server list from a VLDB record.
*/
-struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
struct key *key,
- struct afs_vldb_entry *vldb,
- u8 type_mask)
+ struct afs_vldb_entry *vldb)
{
struct afs_server_list *slist;
struct afs_server *server;
- int ret = -ENOMEM, nr_servers = 0, i, j;
-
- for (i = 0; i < vldb->nr_servers; i++)
- if (vldb->fs_mask[i] & type_mask)
- nr_servers++;
+ unsigned int type_mask = 1 << volume->type;
+ bool use_newrepsites = false;
+ int ret = -ENOMEM, nr_servers = 0, newrep = 0, i, j, usable = 0;
+
+ /* Work out if we're going to restrict to NEWREPSITE-marked servers or
+ * not. If at least one site is marked as NEWREPSITE, then it's likely
+ * that "vos release" is busy updating RO sites. We cut over from one
+ * to the other when >=50% of the sites have been updated. Sites that
+ * are in the process of being updated are marked DONTUSE.
+ */
+ for (i = 0; i < vldb->nr_servers; i++) {
+ if (!(vldb->fs_mask[i] & type_mask))
+ continue;
+ nr_servers++;
+ if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
+ continue;
+ usable++;
+ if (vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE)
+ newrep++;
+ }
slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
if (!slist)
goto error;
+ if (newrep) {
+ if (newrep < usable / 2) {
+ slist->ro_replicating = AFS_RO_REPLICATING_USE_OLD;
+ } else {
+ slist->ro_replicating = AFS_RO_REPLICATING_USE_NEW;
+ use_newrepsites = true;
+ }
+ }
+
refcount_set(&slist->usage, 1);
rwlock_init(&slist->lock);
- for (i = 0; i < AFS_MAXTYPES; i++)
- slist->vids[i] = vldb->vid[i];
-
/* Make sure a records exists for each server in the list. */
for (i = 0; i < vldb->nr_servers; i++) {
+ unsigned long se_flags = 0;
+ bool newrepsite = vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE;
+
if (!(vldb->fs_mask[i] & type_mask))
continue;
+ if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
+ __set_bit(AFS_SE_EXCLUDED, &se_flags);
+ if (newrep && (newrepsite ^ use_newrepsites))
+ __set_bit(AFS_SE_EXCLUDED, &se_flags);
- server = afs_lookup_server(cell, key, &vldb->fs_server[i],
+ server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i],
vldb->addr_version[i]);
if (IS_ERR(server)) {
ret = PTR_ERR(server);
@@ -70,7 +97,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
- afs_put_server(cell->net, server,
+ afs_put_server(volume->cell->net, server,
afs_server_trace_put_slist_isort);
continue;
}
@@ -81,6 +108,9 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
}
slist->servers[j].server = server;
+ slist->servers[j].volume = volume;
+ slist->servers[j].flags = se_flags;
+ slist->servers[j].cb_expires_at = AFS_NO_CB_PROMISE;
slist->nr_servers++;
}
@@ -92,7 +122,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
return slist;
error_2:
- afs_put_serverlist(cell->net, slist);
+ afs_put_serverlist(volume->cell->net, slist);
error:
return ERR_PTR(ret);
}
@@ -103,27 +133,117 @@ error:
bool afs_annotate_server_list(struct afs_server_list *new,
struct afs_server_list *old)
{
- struct afs_server *cur;
- int i, j;
+ unsigned long mask = 1UL << AFS_SE_EXCLUDED;
+ int i;
- if (old->nr_servers != new->nr_servers)
+ if (old->nr_servers != new->nr_servers ||
+ old->ro_replicating != new->ro_replicating)
goto changed;
- for (i = 0; i < old->nr_servers; i++)
+ for (i = 0; i < old->nr_servers; i++) {
if (old->servers[i].server != new->servers[i].server)
goto changed;
-
+ if ((old->servers[i].flags & mask) != (new->servers[i].flags & mask))
+ goto changed;
+ }
return false;
-
changed:
- /* Maintain the same preferred server as before if possible. */
- cur = old->servers[old->preferred].server;
- for (j = 0; j < new->nr_servers; j++) {
- if (new->servers[j].server == cur) {
- new->preferred = j;
- break;
+ return true;
+}
+
+/*
+ * Attach a volume to the servers it is going to use.
+ */
+void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist)
+{
+ struct afs_server_entry *se, *pe;
+ struct afs_server *server;
+ struct list_head *p;
+ unsigned int i;
+
+ down_write(&volume->cell->vs_lock);
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ se = &slist->servers[i];
+ server = se->server;
+
+ list_for_each(p, &server->volumes) {
+ pe = list_entry(p, struct afs_server_entry, slink);
+ if (volume->vid <= pe->volume->vid)
+ break;
}
+ list_add_tail(&se->slink, p);
}
- return true;
+ slist->attached = true;
+ up_write(&volume->cell->vs_lock);
+}
+
+/*
+ * Reattach a volume to the servers it is going to use when server list is
+ * replaced. We try to switch the attachment points to avoid rewalking the
+ * lists.
+ */
+void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new,
+ struct afs_server_list *old)
+{
+ unsigned int n = 0, o = 0;
+
+ down_write(&volume->cell->vs_lock);
+
+ while (n < new->nr_servers || o < old->nr_servers) {
+ struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL;
+ struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL;
+ struct afs_server_entry *s;
+ struct list_head *p;
+ int diff;
+
+ if (pn && po && pn->server == po->server) {
+ pn->cb_expires_at = po->cb_expires_at;
+ list_replace(&po->slink, &pn->slink);
+ n++;
+ o++;
+ continue;
+ }
+
+ if (pn && po)
+ diff = memcmp(&pn->server->uuid, &po->server->uuid,
+ sizeof(pn->server->uuid));
+ else
+ diff = pn ? -1 : 1;
+
+ if (diff < 0) {
+ list_for_each(p, &pn->server->volumes) {
+ s = list_entry(p, struct afs_server_entry, slink);
+ if (volume->vid <= s->volume->vid)
+ break;
+ }
+ list_add_tail(&pn->slink, p);
+ n++;
+ } else {
+ list_del(&po->slink);
+ o++;
+ }
+ }
+
+ up_write(&volume->cell->vs_lock);
+}
+
+/*
+ * Detach a volume from the servers it has been using.
+ */
+void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist)
+{
+ unsigned int i;
+
+ if (!slist->attached)
+ return;
+
+ down_write(&volume->cell->vs_lock);
+
+ for (i = 0; i < slist->nr_servers; i++)
+ list_del(&slist->servers[i].slink);
+
+ slist->attached = false;
+ up_write(&volume->cell->vs_lock);
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index a01a0fb2cdbb..ae2d66a52add 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -381,8 +381,7 @@ static int afs_validate_fc(struct fs_context *fc)
ctx->key = key;
if (ctx->volume) {
- afs_put_volume(ctx->net, ctx->volume,
- afs_volume_trace_put_validate_fc);
+ afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc);
ctx->volume = NULL;
}
@@ -529,7 +528,7 @@ static void afs_destroy_sbi(struct afs_super_info *as)
{
if (as) {
struct afs_net *net = afs_net(as->net_ns);
- afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi);
+ afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi);
afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi);
put_net(as->net_ns);
kfree(as);
@@ -615,7 +614,7 @@ static void afs_free_fc(struct fs_context *fc)
struct afs_fs_context *ctx = fc->fs_private;
afs_destroy_sbi(fc->s_fs_info);
- afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc);
+ afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc);
afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
key_put(ctx->key);
kfree(ctx);
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
new file mode 100644
index 000000000000..46b37f2cce7d
--- /dev/null
+++ b/fs/afs/validation.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* vnode and volume validity verification.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+/*
+ * Data validation is managed through a number of mechanisms from the server:
+ *
+ * (1) On first contact with a server (such as if it has just been rebooted),
+ * the server sends us a CB.InitCallBackState* request.
+ *
+ * (2) On a RW volume, in response to certain vnode (inode)-accessing RPC
+ * calls, the server maintains a time-limited per-vnode promise that it
+ * will send us a CB.CallBack request if a third party alters the vnodes
+ * accessed.
+ *
+ * Note that a vnode-level callbacks may also be sent for other reasons,
+ * such as filelock release.
+ *
+ * (3) On a RO (or Backup) volume, in response to certain vnode-accessing RPC
+ * calls, each server maintains a time-limited per-volume promise that it
+ * will send us a CB.CallBack request if the RO volume is updated to a
+ * snapshot of the RW volume ("vos release"). This is an atomic event
+ * that cuts over all instances of the RO volume across multiple servers
+ * simultaneously.
+ *
+ * Note that a volume-level callbacks may also be sent for other reasons,
+ * such as the volumeserver taking over control of the volume from the
+ * fileserver.
+ *
+ * Note also that each server maintains an independent time limit on an
+ * independent callback.
+ *
+ * (4) Certain RPC calls include a volume information record "VolSync" in
+ * their reply. This contains a creation date for the volume that should
+ * remain unchanged for a RW volume (but will be changed if the volume is
+ * restored from backup) or will be bumped to the time of snapshotting
+ * when a RO volume is released.
+ *
+ * In order to track this events, the following are provided:
+ *
+ * ->cb_v_break. A counter of events that might mean that the contents of
+ * a volume have been altered since we last checked a vnode.
+ *
+ * ->cb_v_check. A counter of the number of events that we've sent a
+ * query to the server for. Everything's up to date if this equals
+ * cb_v_break.
+ *
+ * ->cb_scrub. A counter of the number of regression events for which we
+ * have to completely wipe the cache.
+ *
+ * ->cb_ro_snapshot. A counter of the number of times that we've
+ * recognised that a RO volume has been updated.
+ *
+ * ->cb_break. A counter of events that might mean that the contents of a
+ * vnode have been altered.
+ *
+ * ->cb_expires_at. The time at which the callback promise expires or
+ * AFS_NO_CB_PROMISE if we have no promise.
+ *
+ * The way we manage things is:
+ *
+ * (1) When a volume-level CB.CallBack occurs, we increment ->cb_v_break on
+ * the volume and reset ->cb_expires_at (ie. set AFS_NO_CB_PROMISE) on the
+ * volume and volume's server record.
+ *
+ * (2) When a CB.InitCallBackState occurs, we treat this as a volume-level
+ * callback break on all the volumes that have been using that volume
+ * (ie. increment ->cb_v_break and reset ->cb_expires_at).
+ *
+ * (3) When a vnode-level CB.CallBack occurs, we increment ->cb_break on the
+ * vnode and reset its ->cb_expires_at. If the vnode is mmapped, we also
+ * dispatch a work item to unmap all PTEs to the vnode's pagecache to
+ * force reentry to the filesystem for revalidation.
+ *
+ * (4) When entering the filesystem, we call afs_validate() to check the
+ * validity of a vnode. This first checks to see if ->cb_v_check and
+ * ->cb_v_break match, and if they don't, we lock volume->cb_check_lock
+ * exclusively and perform an FS.FetchStatus on the vnode.
+ *
+ * After checking the volume, we check the vnode. If there's a mismatch
+ * between the volume counters and the vnode's mirrors of those counters,
+ * we lock vnode->validate_lock and issue an FS.FetchStatus on the vnode.
+ *
+ * (5) When the reply from FS.FetchStatus arrives, the VolSync record is
+ * parsed:
+ *
+ * (A) If the Creation timestamp has changed on a RW volume or regressed
+ * on a RO volume, we try to increment ->cb_scrub; if it advances on a
+ * RO volume, we assume "vos release" happened and try to increment
+ * ->cb_ro_snapshot.
+ *
+ * (B) If the Update timestamp has regressed, we try to increment
+ * ->cb_scrub.
+ *
+ * Note that in both of these cases, we only do the increment if we can
+ * cmpxchg the value of the timestamp from the value we noted before the
+ * op. This tries to prevent parallel ops from fighting one another.
+ *
+ * volume->cb_v_check is then set to ->cb_v_break.
+ *
+ * (6) The AFSCallBack record included in the FS.FetchStatus reply is also
+ * parsed and used to set the promise in ->cb_expires_at for the vnode,
+ * the volume and the volume's server record.
+ *
+ * (7) If ->cb_scrub is seen to have advanced, we invalidate the pagecache for
+ * the vnode.
+ */
+
+/*
+ * Check the validity of a vnode/inode and its parent volume.
+ */
+bool afs_check_validity(const struct afs_vnode *vnode)
+{
+ const struct afs_volume *volume = vnode->volume;
+ time64_t deadline = ktime_get_real_seconds() + 10;
+
+ if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
+ atomic64_read(&vnode->cb_expires_at) <= deadline ||
+ volume->cb_expires_at <= deadline ||
+ vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot) ||
+ vnode->cb_scrub != atomic_read(&volume->cb_scrub) ||
+ test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+ _debug("inval");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * See if the server we've just talked to is currently excluded.
+ */
+static bool __afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
+{
+ const struct afs_server_entry *se;
+ const struct afs_server_list *slist;
+ bool is_excluded = true;
+ int i;
+
+ rcu_read_lock();
+
+ slist = rcu_dereference(volume->servers);
+ for (i = 0; i < slist->nr_servers; i++) {
+ se = &slist->servers[i];
+ if (op->server == se->server) {
+ is_excluded = test_bit(AFS_SE_EXCLUDED, &se->flags);
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return is_excluded;
+}
+
+/*
+ * Update the volume's server list when the creation time changes and see if
+ * the server we've just talked to is currently excluded.
+ */
+static int afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
+{
+ int ret;
+
+ if (__afs_is_server_excluded(op, volume))
+ return 1;
+
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+ ret = afs_check_volume_status(op->volume, op);
+ if (ret < 0)
+ return ret;
+
+ return __afs_is_server_excluded(op, volume);
+}
+
+/*
+ * Handle a change to the volume creation time in the VolSync record.
+ */
+static int afs_update_volume_creation_time(struct afs_operation *op, struct afs_volume *volume)
+{
+ unsigned int snap;
+ time64_t cur = volume->creation_time;
+ time64_t old = op->pre_volsync.creation;
+ time64_t new = op->volsync.creation;
+ int ret;
+
+ _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new);
+
+ if (cur == TIME64_MIN) {
+ volume->creation_time = new;
+ return 0;
+ }
+
+ if (new == cur)
+ return 0;
+
+ /* Try to advance the creation timestamp from what we had before the
+ * operation to what we got back from the server. This should
+ * hopefully ensure that in a race between multiple operations only one
+ * of them will do this.
+ */
+ if (cur != old)
+ return 0;
+
+ /* If the creation time changes in an unexpected way, we need to scrub
+ * our caches. For a RW vol, this will only change if the volume is
+ * restored from a backup; for a RO/Backup vol, this will advance when
+ * the volume is updated to a new snapshot (eg. "vos release").
+ */
+ if (volume->type == AFSVL_RWVOL)
+ goto regressed;
+ if (volume->type == AFSVL_BACKVOL) {
+ if (new < old)
+ goto regressed;
+ goto advance;
+ }
+
+ /* We have an RO volume, we need to query the VL server and look at the
+ * server flags to see if RW->RO replication is in progress.
+ */
+ ret = afs_is_server_excluded(op, volume);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ snap = atomic_read(&volume->cb_ro_snapshot);
+ trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_volume_excluded);
+ return ret;
+ }
+
+advance:
+ snap = atomic_inc_return(&volume->cb_ro_snapshot);
+ trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_for_vos_release);
+ volume->creation_time = new;
+ return 0;
+
+regressed:
+ atomic_inc(&volume->cb_scrub);
+ trace_afs_cb_v_break(volume->vid, 0, afs_cb_break_for_creation_regress);
+ volume->creation_time = new;
+ return 0;
+}
+
+/*
+ * Handle a change to the volume update time in the VolSync record.
+ */
+static void afs_update_volume_update_time(struct afs_operation *op, struct afs_volume *volume)
+{
+ enum afs_cb_break_reason reason = afs_cb_break_no_break;
+ time64_t cur = volume->update_time;
+ time64_t old = op->pre_volsync.update;
+ time64_t new = op->volsync.update;
+
+ _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new);
+
+ if (cur == TIME64_MIN) {
+ volume->update_time = new;
+ return;
+ }
+
+ if (new == cur)
+ return;
+
+ /* If the volume update time changes in an unexpected way, we need to
+ * scrub our caches. For a RW vol, this will advance on every
+ * modification op; for a RO/Backup vol, this will advance when the
+ * volume is updated to a new snapshot (eg. "vos release").
+ */
+ if (new < old)
+ reason = afs_cb_break_for_update_regress;
+
+ /* Try to advance the update timestamp from what we had before the
+ * operation to what we got back from the server. This should
+ * hopefully ensure that in a race between multiple operations only one
+ * of them will do this.
+ */
+ if (cur == old) {
+ if (reason == afs_cb_break_for_update_regress) {
+ atomic_inc(&volume->cb_scrub);
+ trace_afs_cb_v_break(volume->vid, 0, reason);
+ }
+ volume->update_time = new;
+ }
+}
+
+static int afs_update_volume_times(struct afs_operation *op, struct afs_volume *volume)
+{
+ int ret = 0;
+
+ if (likely(op->volsync.creation == volume->creation_time &&
+ op->volsync.update == volume->update_time))
+ return 0;
+
+ mutex_lock(&volume->volsync_lock);
+ if (op->volsync.creation != volume->creation_time) {
+ ret = afs_update_volume_creation_time(op, volume);
+ if (ret < 0)
+ goto out;
+ }
+ if (op->volsync.update != volume->update_time)
+ afs_update_volume_update_time(op, volume);
+out:
+ mutex_unlock(&volume->volsync_lock);
+ return ret;
+}
+
+/*
+ * Update the state of a volume, including recording the expiration time of the
+ * callback promise. Returns 1 to redo the operation from the start.
+ */
+int afs_update_volume_state(struct afs_operation *op)
+{
+ struct afs_server_list *slist = op->server_list;
+ struct afs_server_entry *se = &slist->servers[op->server_index];
+ struct afs_callback *cb = &op->file[0].scb.callback;
+ struct afs_volume *volume = op->volume;
+ unsigned int cb_v_break = atomic_read(&volume->cb_v_break);
+ unsigned int cb_v_check = atomic_read(&volume->cb_v_check);
+ int ret;
+
+ _enter("%llx", op->volume->vid);
+
+ if (op->volsync.creation != TIME64_MIN || op->volsync.update != TIME64_MIN) {
+ ret = afs_update_volume_times(op, volume);
+ if (ret != 0) {
+ _leave(" = %d", ret);
+ return ret;
+ }
+ }
+
+ if (op->cb_v_break == cb_v_break &&
+ (op->file[0].scb.have_cb || op->file[1].scb.have_cb)) {
+ time64_t expires_at = cb->expires_at;
+
+ if (!op->file[0].scb.have_cb)
+ expires_at = op->file[1].scb.callback.expires_at;
+
+ se->cb_expires_at = expires_at;
+ volume->cb_expires_at = expires_at;
+ }
+ if (cb_v_check < op->cb_v_break)
+ atomic_cmpxchg(&volume->cb_v_check, cb_v_check, op->cb_v_break);
+ return 0;
+}
+
+/*
+ * mark the data attached to an inode as obsolete due to a write on the server
+ * - might also want to ditch all the outstanding writes and dirty pages
+ */
+static void afs_zap_data(struct afs_vnode *vnode)
+{
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ afs_invalidate_cache(vnode, 0);
+
+ /* nuke all the non-dirty pages that aren't locked, mapped or being
+ * written back in a regular file and completely discard the pages in a
+ * directory or symlink */
+ if (S_ISREG(vnode->netfs.inode.i_mode))
+ invalidate_remote_inode(&vnode->netfs.inode);
+ else
+ invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
+}
+
+/*
+ * validate a vnode/inode
+ * - there are several things we need to check
+ * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ * symlink)
+ * - parent dir metadata changed (security changes)
+ * - dentry data changed (write, truncate)
+ * - dentry metadata changed (security changes)
+ */
+int afs_validate(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_volume *volume = vnode->volume;
+ unsigned int cb_ro_snapshot, cb_scrub;
+ time64_t deadline = ktime_get_real_seconds() + 10;
+ bool zap = false, locked_vol = false;
+ int ret;
+
+ _enter("{v={%llx:%llu} fl=%lx},%x",
+ vnode->fid.vid, vnode->fid.vnode, vnode->flags,
+ key_serial(key));
+
+ if (afs_check_validity(vnode))
+ return 0;
+
+ ret = down_write_killable(&vnode->validate_lock);
+ if (ret < 0)
+ goto error;
+
+ /* Validate a volume after the v_break has changed or the volume
+ * callback expired. We only want to do this once per volume per
+ * v_break change. The actual work will be done when parsing the
+ * status fetch reply.
+ */
+ if (volume->cb_expires_at <= deadline ||
+ atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break)) {
+ ret = mutex_lock_interruptible(&volume->cb_check_lock);
+ if (ret < 0)
+ goto error_unlock;
+ locked_vol = true;
+ }
+
+ cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot);
+ cb_scrub = atomic_read(&volume->cb_scrub);
+ if (vnode->cb_ro_snapshot != cb_ro_snapshot ||
+ vnode->cb_scrub != cb_scrub)
+ unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
+
+ if (vnode->cb_ro_snapshot != cb_ro_snapshot ||
+ vnode->cb_scrub != cb_scrub ||
+ volume->cb_expires_at <= deadline ||
+ atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
+ atomic64_read(&vnode->cb_expires_at) <= deadline
+ ) {
+ ret = afs_fetch_status(vnode, key, false, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ ret = -ESTALE;
+ }
+ goto error_unlock;
+ }
+
+ _debug("new promise [fl=%lx]", vnode->flags);
+ }
+
+ /* We can drop the volume lock now as. */
+ if (locked_vol) {
+ mutex_unlock(&volume->cb_check_lock);
+ locked_vol = false;
+ }
+
+ cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot);
+ cb_scrub = atomic_read(&volume->cb_scrub);
+ _debug("vnode inval %x==%x %x==%x",
+ vnode->cb_ro_snapshot, cb_ro_snapshot,
+ vnode->cb_scrub, cb_scrub);
+ if (vnode->cb_scrub != cb_scrub)
+ zap = true;
+ vnode->cb_ro_snapshot = cb_ro_snapshot;
+ vnode->cb_scrub = cb_scrub;
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ _debug("file already deleted");
+ ret = -ESTALE;
+ goto error_unlock;
+ }
+
+ /* if the vnode's data version number changed then its contents are
+ * different */
+ zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+ if (zap)
+ afs_zap_data(vnode);
+ up_write(&vnode->validate_lock);
+ _leave(" = 0");
+ return 0;
+
+error_unlock:
+ if (locked_vol)
+ mutex_unlock(&volume->cb_check_lock);
+ up_write(&vnode->validate_lock);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index f04a80e4f5c3..9f36e14f1c2d 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -33,55 +33,6 @@ static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *k
}
/*
- * Compare two addresses.
- */
-static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a,
- const struct sockaddr_rxrpc *srx_b)
-{
- short port_a, port_b;
- int addr_a, addr_b, diff;
-
- diff = (short)srx_a->transport_type - (short)srx_b->transport_type;
- if (diff)
- goto out;
-
- switch (srx_a->transport_type) {
- case AF_INET: {
- const struct sockaddr_in *a = &srx_a->transport.sin;
- const struct sockaddr_in *b = &srx_b->transport.sin;
- addr_a = ntohl(a->sin_addr.s_addr);
- addr_b = ntohl(b->sin_addr.s_addr);
- diff = addr_a - addr_b;
- if (diff == 0) {
- port_a = ntohs(a->sin_port);
- port_b = ntohs(b->sin_port);
- diff = port_a - port_b;
- }
- break;
- }
-
- case AF_INET6: {
- const struct sockaddr_in6 *a = &srx_a->transport.sin6;
- const struct sockaddr_in6 *b = &srx_b->transport.sin6;
- diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16);
- if (diff == 0) {
- port_a = ntohs(a->sin6_port);
- port_b = ntohs(b->sin6_port);
- diff = port_a - port_b;
- }
- break;
- }
-
- default:
- WARN_ON(1);
- diff = 1;
- }
-
-out:
- return diff;
-}
-
-/*
* Compare the address lists of a pair of fileservers.
*/
static int afs_compare_fs_alists(const struct afs_server *server_a,
@@ -90,13 +41,13 @@ static int afs_compare_fs_alists(const struct afs_server *server_a,
const struct afs_addr_list *la, *lb;
int a = 0, b = 0, addr_matches = 0;
- la = rcu_dereference(server_a->addresses);
- lb = rcu_dereference(server_b->addresses);
+ la = rcu_dereference(server_a->endpoint_state)->addresses;
+ lb = rcu_dereference(server_b->endpoint_state)->addresses;
while (a < la->nr_addrs && b < lb->nr_addrs) {
- const struct sockaddr_rxrpc *srx_a = &la->addrs[a];
- const struct sockaddr_rxrpc *srx_b = &lb->addrs[b];
- int diff = afs_compare_addrs(srx_a, srx_b);
+ unsigned long pa = (unsigned long)la->addrs[a].peer;
+ unsigned long pb = (unsigned long)lb->addrs[b].peer;
+ long diff = pa - pb;
if (diff < 0) {
a++;
@@ -126,7 +77,7 @@ static int afs_compare_volume_slists(const struct afs_volume *vol_a,
lb = rcu_dereference(vol_b->servers);
for (i = 0; i < AFS_MAXTYPES; i++)
- if (la->vids[i] != lb->vids[i])
+ if (vol_a->vids[i] != vol_b->vids[i])
return 0;
while (a < la->nr_servers && b < lb->nr_servers) {
@@ -205,7 +156,7 @@ static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
/* And see if it's in the new cell. */
volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len);
if (IS_ERR(volume)) {
- afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias);
+ afs_put_volume(pvol, afs_volume_trace_put_query_alias);
if (PTR_ERR(volume) != -ENOMEDIUM)
return PTR_ERR(volume);
/* That volume is not in the new cell, so not an alias */
@@ -223,8 +174,8 @@ static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
rcu_read_unlock();
}
- afs_put_volume(cell->net, volume, afs_volume_trace_put_query_alias);
- afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias);
+ afs_put_volume(volume, afs_volume_trace_put_query_alias);
+ afs_put_volume(pvol, afs_volume_trace_put_query_alias);
return ret;
}
@@ -285,7 +236,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
while (afs_select_vlserver(&vc)) {
if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) {
- vc.ac.error = -EOPNOTSUPP;
+ vc.call_error = -EOPNOTSUPP;
skipped = true;
continue;
}
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index acc48216136a..9b1c20daac53 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -13,6 +13,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
unsigned short port)
{
struct afs_vlserver *vlserver;
+ static atomic_t debug_ids;
vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
GFP_KERNEL);
@@ -21,8 +22,10 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
rwlock_init(&vlserver->lock);
init_waitqueue_head(&vlserver->probe_wq);
spin_lock_init(&vlserver->probe_lock);
+ vlserver->debug_id = atomic_inc_return(&debug_ids);
vlserver->rtt = UINT_MAX;
vlserver->name_len = name_len;
+ vlserver->service_id = VL_SERVICE;
vlserver->port = port;
memcpy(vlserver->name, name, name_len);
}
@@ -33,7 +36,8 @@ static void afs_vlserver_rcu(struct rcu_head *rcu)
{
struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
- afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+ afs_put_addrlist(rcu_access_pointer(vlserver->addresses),
+ afs_alist_trace_put_vlserver);
kfree_rcu(vlserver, rcu);
}
@@ -83,14 +87,15 @@ static u16 afs_extract_le16(const u8 **_b)
/*
* Build a VL server address list from a DNS queried server list.
*/
-static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+static struct afs_addr_list *afs_extract_vl_addrs(struct afs_net *net,
+ const u8 **_b, const u8 *end,
u8 nr_addrs, u16 port)
{
struct afs_addr_list *alist;
const u8 *b = *_b;
int ret = -EINVAL;
- alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+ alist = afs_alloc_addrlist(nr_addrs);
if (!alist)
return ERR_PTR(-ENOMEM);
if (nr_addrs == 0)
@@ -109,7 +114,9 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
goto error;
}
memcpy(x, b, 4);
- afs_merge_fs_addr4(alist, x[0], port);
+ ret = afs_merge_fs_addr4(net, alist, x[0], port);
+ if (ret < 0)
+ goto error;
b += 4;
break;
@@ -119,7 +126,9 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
goto error;
}
memcpy(x, b, 16);
- afs_merge_fs_addr6(alist, x, port);
+ ret = afs_merge_fs_addr6(net, alist, x, port);
+ if (ret < 0)
+ goto error;
b += 16;
break;
@@ -140,7 +149,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
error:
*_b = b;
- afs_put_addrlist(alist);
+ afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
return ERR_PTR(ret);
}
@@ -247,7 +256,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
/* Extract the addresses - note that we can't skip this as we
* have to advance the payload pointer.
*/
- addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
+ addrs = afs_extract_vl_addrs(cell->net, &b, end, bs.nr_addrs, bs.port);
if (IS_ERR(addrs)) {
ret = PTR_ERR(addrs);
goto error_2;
@@ -255,7 +264,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
if (vllist->nr_servers >= nr_servers) {
_debug("skip %u >= %u", vllist->nr_servers, nr_servers);
- afs_put_addrlist(addrs);
+ afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty);
afs_put_vlserver(cell->net, server);
continue;
}
@@ -264,7 +273,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
addrs->status = bs.status;
if (addrs->nr_addrs == 0) {
- afs_put_addrlist(addrs);
+ afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty);
if (!rcu_access_pointer(server->addresses)) {
afs_put_vlserver(cell->net, server);
continue;
@@ -276,7 +285,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
old = rcu_replace_pointer(server->addresses, old,
lockdep_is_held(&server->lock));
write_unlock(&server->lock);
- afs_put_addrlist(old);
+ afs_put_addrlist(old, afs_alist_trace_put_vlserver_old);
}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index 58452b86e672..3d2e0c925460 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -46,11 +46,12 @@ static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up)
*/
void afs_vlserver_probe_result(struct afs_call *call)
{
- struct afs_addr_list *alist = call->alist;
+ struct afs_addr_list *alist = call->vl_probe;
struct afs_vlserver *server = call->vlserver;
+ struct afs_address *addr = &alist->addrs[call->probe_index];
unsigned int server_index = call->server_index;
unsigned int rtt_us = 0;
- unsigned int index = call->addr_ix;
+ unsigned int index = call->probe_index;
bool have_result = false;
int ret = call->error;
@@ -89,7 +90,7 @@ void afs_vlserver_probe_result(struct afs_call *call)
case -ETIME:
default:
clear_bit(index, &alist->responded);
- set_bit(index, &alist->failed);
+ set_bit(index, &alist->probe_failed);
if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) &&
(server->probe.error == 0 ||
server->probe.error == -ETIMEDOUT ||
@@ -101,21 +102,21 @@ void afs_vlserver_probe_result(struct afs_call *call)
responded:
set_bit(index, &alist->responded);
- clear_bit(index, &alist->failed);
+ clear_bit(index, &alist->probe_failed);
if (call->service_id == YFS_VL_SERVICE) {
server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS;
set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
} else {
server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS;
if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) {
clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
- alist->addrs[index].srx_service = call->service_id;
+ server->service_id = call->service_id;
}
}
- rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ rtt_us = rxrpc_kernel_get_srtt(addr->peer);
if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
@@ -130,8 +131,10 @@ responded:
out:
spin_unlock(&server->probe_lock);
- _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
- server_index, index, &alist->addrs[index].transport, rtt_us, ret);
+ trace_afs_vl_probe(server, false, alist, index, call->error, call->abort_code, rtt_us);
+ _debug("probe [%u][%u] %pISpc rtt=%d ret=%d",
+ server_index, index, rxrpc_kernel_remote_addr(addr->peer),
+ rtt_us, ret);
afs_done_one_vl_probe(server, have_result);
}
@@ -146,35 +149,52 @@ static bool afs_do_probe_vlserver(struct afs_net *net,
unsigned int server_index,
struct afs_error *_e)
{
- struct afs_addr_cursor ac = {
- .index = 0,
- };
+ struct afs_addr_list *alist;
struct afs_call *call;
+ unsigned long unprobed;
+ unsigned int index, i;
bool in_progress = false;
+ int best_prio;
_enter("%s", server->name);
read_lock(&server->lock);
- ac.alist = rcu_dereference_protected(server->addresses,
- lockdep_is_held(&server->lock));
+ alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->lock));
+ afs_get_addrlist(alist, afs_alist_trace_get_vlprobe);
read_unlock(&server->lock);
- atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ atomic_set(&server->probe_outstanding, alist->nr_addrs);
memset(&server->probe, 0, sizeof(server->probe));
server->probe.rtt = UINT_MAX;
- for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
- call = afs_vl_get_capabilities(net, &ac, key, server,
+ unprobed = (1UL << alist->nr_addrs) - 1;
+ while (unprobed) {
+ best_prio = -1;
+ index = 0;
+ for (i = 0; i < alist->nr_addrs; i++) {
+ if (test_bit(i, &unprobed) &&
+ alist->addrs[i].prio > best_prio) {
+ index = i;
+ best_prio = alist->addrs[i].prio;
+ }
+ }
+ __clear_bit(index, &unprobed);
+
+ trace_afs_vl_probe(server, true, alist, index, 0, 0, 0);
+ call = afs_vl_get_capabilities(net, alist, index, key, server,
server_index);
if (!IS_ERR(call)) {
+ afs_prioritise_error(_e, call->error, call->abort_code);
afs_put_call(call);
in_progress = true;
} else {
- afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+ afs_prioritise_error(_e, PTR_ERR(call), 0);
afs_done_one_vl_probe(server, false);
}
}
+ afs_put_addrlist(alist, afs_alist_trace_put_vlprobe);
return in_progress;
}
@@ -185,12 +205,10 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
struct afs_vlserver_list *vllist)
{
struct afs_vlserver *server;
- struct afs_error e;
+ struct afs_error e = {};
bool in_progress = false;
int i;
- e.error = 0;
- e.responded = false;
for (i = 0; i < vllist->nr_servers; i++) {
server = vllist->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index eb415ce56360..d8f79f6ada3d 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -17,18 +17,21 @@
bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
struct key *key)
{
+ static atomic_t debug_ids;
+
memset(vc, 0, sizeof(*vc));
vc->cell = cell;
vc->key = key;
- vc->error = -EDESTADDRREQ;
- vc->ac.error = SHRT_MAX;
+ vc->cumul_error.error = -EDESTADDRREQ;
+ vc->nr_iterations = -1;
if (signal_pending(current)) {
- vc->error = -EINTR;
+ vc->cumul_error.error = -EINTR;
vc->flags |= AFS_VL_CURSOR_STOP;
return false;
}
+ vc->debug_id = atomic_inc_return(&debug_ids);
return true;
}
@@ -52,7 +55,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
&cell->dns_lookup_count,
smp_load_acquire(&cell->dns_lookup_count)
!= dns_lookup_count) < 0) {
- vc->error = -ERESTARTSYS;
+ vc->cumul_error.error = -ERESTARTSYS;
return false;
}
}
@@ -60,12 +63,12 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
/* Status load is ordered after lookup counter load */
if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
pr_warn("No record of cell %s\n", cell->name);
- vc->error = -ENOENT;
+ vc->cumul_error.error = -ENOENT;
return false;
}
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
- vc->error = -EDESTADDRREQ;
+ vc->cumul_error.error = -EDESTADDRREQ;
return false;
}
}
@@ -78,8 +81,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
if (!vc->server_list->nr_servers)
return false;
- vc->untried = (1UL << vc->server_list->nr_servers) - 1;
- vc->index = -1;
+ vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
+ vc->server_index = -1;
return true;
}
@@ -89,54 +92,57 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
*/
bool afs_select_vlserver(struct afs_vl_cursor *vc)
{
- struct afs_addr_list *alist;
+ struct afs_addr_list *alist = vc->alist;
struct afs_vlserver *vlserver;
- struct afs_error e;
- u32 rtt;
- int error = vc->ac.error, i;
+ unsigned long set, failed;
+ unsigned int rtt;
+ s32 abort_code = vc->call_abort_code;
+ int error = vc->call_error, i;
+
+ vc->nr_iterations++;
- _enter("%lx[%d],%lx[%d],%d,%d",
- vc->untried, vc->index,
- vc->ac.tried, vc->ac.index,
- error, vc->ac.abort_code);
+ _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
+ vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
+ vc->addr_index, vc->addr_tried,
+ error, abort_code);
if (vc->flags & AFS_VL_CURSOR_STOP) {
_leave(" = f [stopped]");
return false;
}
- vc->nr_iterations++;
+ if (vc->nr_iterations == 0)
+ goto start;
+
+ WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
/* Evaluate the result of the previous operation, if there was one. */
switch (error) {
- case SHRT_MAX:
- goto start;
-
default:
case 0:
/* Success or local failure. Stop. */
- vc->error = error;
+ vc->cumul_error.error = error;
vc->flags |= AFS_VL_CURSOR_STOP;
- _leave(" = f [okay/local %d]", vc->ac.error);
+ _leave(" = f [okay/local %d]", vc->cumul_error.error);
return false;
case -ECONNABORTED:
/* The far side rejected the operation on some grounds. This
* might involve the server being busy or the volume having been moved.
*/
- switch (vc->ac.abort_code) {
+ switch (abort_code) {
case AFSVL_IO:
case AFSVL_BADVOLOPER:
case AFSVL_NOMEM:
/* The server went weird. */
- vc->error = -EREMOTEIO;
+ afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
//write_lock(&vc->cell->vl_servers_lock);
- //vc->server_list->weird_mask |= 1 << vc->index;
+ //vc->server_list->weird_mask |= 1 << vc->server_index;
//write_unlock(&vc->cell->vl_servers_lock);
goto next_server;
default:
- vc->error = afs_abort_to_error(vc->ac.abort_code);
+ afs_prioritise_error(&vc->cumul_error, error, abort_code);
goto failed;
}
@@ -149,12 +155,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
case -ETIMEDOUT:
case -ETIME:
_debug("no conn %d", error);
- vc->error = error;
+ afs_prioritise_error(&vc->cumul_error, error, 0);
goto iterate_address;
case -ECONNRESET:
_debug("call reset");
- vc->error = error;
+ afs_prioritise_error(&vc->cumul_error, error, 0);
vc->flags |= AFS_VL_CURSOR_RETRY;
goto next_server;
@@ -165,7 +171,13 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
restart_from_beginning:
_debug("restart");
- afs_end_cursor(&vc->ac);
+ if (vc->call_responded &&
+ vc->addr_index != vc->alist->preferred &&
+ test_bit(alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(alist->preferred, vc->addr_index);
+ afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
+ alist = vc->alist = NULL;
+
afs_put_vlserverlist(vc->cell->net, vc->server_list);
vc->server_list = NULL;
if (vc->flags & AFS_VL_CURSOR_RETRIED)
@@ -173,53 +185,58 @@ restart_from_beginning:
vc->flags |= AFS_VL_CURSOR_RETRIED;
start:
_debug("start");
+ ASSERTCMP(alist, ==, NULL);
if (!afs_start_vl_iteration(vc))
goto failed;
error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
- if (error < 0)
- goto failed_set_error;
+ if (error < 0) {
+ afs_prioritise_error(&vc->cumul_error, error, 0);
+ goto failed;
+ }
pick_server:
- _debug("pick [%lx]", vc->untried);
+ _debug("pick [%lx]", vc->untried_servers);
+ ASSERTCMP(alist, ==, NULL);
- error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
- if (error < 0)
- goto failed_set_error;
+ error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
+ if (error < 0) {
+ afs_prioritise_error(&vc->cumul_error, error, 0);
+ goto failed;
+ }
/* Pick the untried server with the lowest RTT. */
- vc->index = vc->server_list->preferred;
- if (test_bit(vc->index, &vc->untried))
+ vc->server_index = vc->server_list->preferred;
+ if (test_bit(vc->server_index, &vc->untried_servers))
goto selected_server;
- vc->index = -1;
- rtt = U32_MAX;
+ vc->server_index = -1;
+ rtt = UINT_MAX;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
- if (!test_bit(i, &vc->untried) ||
+ if (!test_bit(i, &vc->untried_servers) ||
!test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
continue;
- if (s->probe.rtt < rtt) {
- vc->index = i;
+ if (s->probe.rtt <= rtt) {
+ vc->server_index = i;
rtt = s->probe.rtt;
}
}
- if (vc->index == -1)
+ if (vc->server_index == -1)
goto no_more_servers;
selected_server:
- _debug("use %d", vc->index);
- __clear_bit(vc->index, &vc->untried);
+ _debug("use %d", vc->server_index);
+ __clear_bit(vc->server_index, &vc->untried_servers);
/* We're starting on a different vlserver from the list. We need to
* check it, find its address list and probe its capabilities before we
* use it.
*/
- ASSERTCMP(vc->ac.alist, ==, NULL);
- vlserver = vc->server_list->servers[vc->index].server;
+ vlserver = vc->server_list->servers[vc->server_index].server;
vc->server = vlserver;
_debug("USING VLSERVER: %s", vlserver->name);
@@ -227,34 +244,48 @@ selected_server:
read_lock(&vlserver->lock);
alist = rcu_dereference_protected(vlserver->addresses,
lockdep_is_held(&vlserver->lock));
- afs_get_addrlist(alist);
+ vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
read_unlock(&vlserver->lock);
- memset(&vc->ac, 0, sizeof(vc->ac));
-
- if (!vc->ac.alist)
- vc->ac.alist = alist;
- else
- afs_put_addrlist(alist);
-
- vc->ac.index = -1;
+ vc->addr_tried = 0;
+ vc->addr_index = -1;
iterate_address:
- ASSERT(vc->ac.alist);
/* Iterate over the current server's address list to try and find an
* address on which it will respond to us.
*/
- if (!afs_iterate_addresses(&vc->ac))
+ set = READ_ONCE(alist->responded);
+ failed = READ_ONCE(alist->probe_failed);
+ vc->addr_index = READ_ONCE(alist->preferred);
+
+ _debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
+
+ set &= ~(failed | vc->addr_tried);
+
+ if (!set)
goto next_server;
- _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+ if (!test_bit(vc->addr_index, &set))
+ vc->addr_index = __ffs(set);
+
+ set_bit(vc->addr_index, &vc->addr_tried);
+ vc->alist = alist;
- _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
+ _debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
+
+ vc->call_responded = false;
+ _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
return true;
next_server:
_debug("next");
- afs_end_cursor(&vc->ac);
+ ASSERT(alist);
+ if (vc->call_responded &&
+ vc->addr_index != alist->preferred &&
+ test_bit(alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(alist->preferred, vc->addr_index);
+ afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
+ alist = vc->alist = NULL;
goto pick_server;
no_more_servers:
@@ -264,25 +295,26 @@ no_more_servers:
if (vc->flags & AFS_VL_CURSOR_RETRY)
goto restart_from_beginning;
- e.error = -EDESTADDRREQ;
- e.responded = false;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
- e.responded = true;
- afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+ vc->cumul_error.responded = true;
+ afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
s->probe.abort_code);
}
- error = e.error;
-
-failed_set_error:
- vc->error = error;
failed:
+ if (alist) {
+ if (vc->call_responded &&
+ vc->addr_index != alist->preferred &&
+ test_bit(alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(alist->preferred, vc->addr_index);
+ afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
+ alist = vc->alist = NULL;
+ }
vc->flags |= AFS_VL_CURSOR_STOP;
- afs_end_cursor(&vc->ac);
- _leave(" = f [failed %d]", vc->error);
+ _leave(" = f [failed %d]", vc->cumul_error.error);
return false;
}
@@ -305,7 +337,10 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
pr_notice("DNS: src=%u st=%u lc=%x\n",
cell->dns_source, cell->dns_status, cell->dns_lookup_count);
pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
- vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
+ vc->untried_servers, vc->server_index, vc->nr_iterations,
+ vc->flags, vc->cumul_error.error);
+ pr_notice("VC: call er=%d ac=%d r=%u\n",
+ vc->call_error, vc->call_abort_code, vc->call_responded);
if (vc->server_list) {
const struct afs_vlserver_list *sl = vc->server_list;
@@ -322,16 +357,14 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
a->nr_ipv4, a->nr_addrs, a->max_addrs,
a->preferred);
pr_notice("VC: - R=%lx F=%lx\n",
- a->responded, a->failed);
- if (a == vc->ac.alist)
+ a->responded, a->probe_failed);
+ if (a == vc->alist)
pr_notice("VC: - current\n");
}
}
}
- pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
- vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
- vc->ac.responded, vc->ac.nr_iterations);
+ pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
rcu_read_unlock();
}
@@ -342,17 +375,25 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
{
struct afs_net *net = vc->cell->net;
- if (vc->error == -EDESTADDRREQ ||
- vc->error == -EADDRNOTAVAIL ||
- vc->error == -ENETUNREACH ||
- vc->error == -EHOSTUNREACH)
+ _enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
+
+ switch (vc->cumul_error.error) {
+ case -EDESTADDRREQ:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
afs_vl_dump_edestaddrreq(vc);
+ break;
+ }
- afs_end_cursor(&vc->ac);
+ if (vc->alist) {
+ if (vc->call_responded &&
+ vc->addr_index != vc->alist->preferred &&
+ test_bit(vc->alist->preferred, &vc->addr_tried))
+ WRITE_ONCE(vc->alist->preferred, vc->addr_index);
+ afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
+ vc->alist = NULL;
+ }
afs_put_vlserverlist(net, vc->server_list);
-
- if (vc->error == -ECONNABORTED)
- vc->error = afs_abort_to_error(vc->ac.abort_code);
-
- return vc->error;
+ return vc->cumul_error.error;
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 00fca3c66ba6..cac75f89b64a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -18,8 +18,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
{
struct afs_uvldbentry__xdr *uvldb;
struct afs_vldb_entry *entry;
- bool new_only = false;
- u32 tmp, nr_servers, vlflags;
+ u32 nr_servers, vlflags;
int i, ret;
_enter("");
@@ -41,27 +40,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
entry->name[i] = 0;
entry->name_len = strlen(entry->name);
- /* If there is a new replication site that we can use, ignore all the
- * sites that aren't marked as new.
- */
- for (i = 0; i < nr_servers; i++) {
- tmp = ntohl(uvldb->serverFlags[i]);
- if (!(tmp & AFS_VLSF_DONTUSE) &&
- (tmp & AFS_VLSF_NEWREPSITE))
- new_only = true;
- }
-
vlflags = ntohl(uvldb->flags);
for (i = 0; i < nr_servers; i++) {
struct afs_uuid__xdr *xdr;
struct afs_uuid *uuid;
+ u32 tmp = ntohl(uvldb->serverFlags[i]);
int j;
int n = entry->nr_servers;
- tmp = ntohl(uvldb->serverFlags[i]);
- if (tmp & AFS_VLSF_DONTUSE ||
- (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
- continue;
if (tmp & AFS_VLSF_RWVOL) {
entry->fs_mask[n] |= AFS_VOL_VTM_RW;
if (vlflags & AFS_VLF_BACKEXISTS)
@@ -82,6 +68,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
for (j = 0; j < 6; j++)
uuid->node[j] = (u8)ntohl(xdr->node[j]);
+ entry->vlsf_flags[n] = tmp;
entry->addr_version[n] = ntohl(uvldb->serverUnique[i]);
entry->nr_servers++;
}
@@ -106,12 +93,6 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
return 0;
}
-static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
-{
- kfree(call->ret_vldb);
- afs_flat_call_destructor(call);
-}
-
/*
* VL.GetEntryByNameU operation type.
*/
@@ -119,7 +100,7 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = {
.name = "VL.GetEntryByNameU",
.op = afs_VL_GetEntryByNameU,
.deliver = afs_deliver_vl_get_entry_by_name_u,
- .destructor = afs_destroy_vl_get_entry_by_name_u,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -155,6 +136,8 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
call->key = vc->key;
call->ret_vldb = entry;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* Marshall the parameters */
bp = call->request;
@@ -165,8 +148,17 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
memset((void *)bp + volnamesz, 0, padsz);
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (struct afs_vldb_entry *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ afs_put_call(call);
+ if (vc->call_error) {
+ kfree(entry);
+ return ERR_PTR(vc->call_error);
+ }
+ return entry;
}
/*
@@ -208,7 +200,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
count = ntohl(*bp);
nentries = min(nentries, count);
- alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT);
+ alist = afs_alloc_addrlist(nentries);
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
@@ -230,9 +222,13 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
alist = call->ret_alist;
bp = call->buffer;
count = min(call->count, 4U);
- for (i = 0; i < count; i++)
- if (alist->nr_addrs < call->count2)
- afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
+ for (i = 0; i < count; i++) {
+ if (alist->nr_addrs < call->count2) {
+ ret = afs_merge_fs_addr4(call->net, alist, *bp++, AFS_FS_PORT);
+ if (ret < 0)
+ return ret;
+ }
+ }
call->count -= count;
if (call->count > 0)
@@ -245,12 +241,6 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
return 0;
}
-static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
-{
- afs_put_addrlist(call->ret_alist);
- return afs_flat_call_destructor(call);
-}
-
/*
* VL.GetAddrsU operation type.
*/
@@ -258,7 +248,7 @@ static const struct afs_call_type afs_RXVLGetAddrsU = {
.name = "VL.GetAddrsU",
.op = afs_VL_GetAddrsU,
.deliver = afs_deliver_vl_get_addrs_u,
- .destructor = afs_vl_get_addrs_u_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -269,6 +259,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
const uuid_t *uuid)
{
struct afs_ListAddrByAttributes__xdr *r;
+ struct afs_addr_list *alist;
const struct afs_uuid *u = (const struct afs_uuid *)uuid;
struct afs_call *call;
struct afs_net *net = vc->cell->net;
@@ -286,6 +277,8 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
call->key = vc->key;
call->ret_alist = NULL;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* Marshall the parameters */
bp = call->request;
@@ -304,8 +297,18 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
r->uuid.node[i] = htonl(u->node[i]);
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ alist = call->ret_alist;
+ afs_put_call(call);
+ if (vc->call_error) {
+ afs_put_addrlist(alist, afs_alist_trace_put_getaddru);
+ return ERR_PTR(vc->call_error);
+ }
+ return alist;
}
/*
@@ -355,6 +358,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
static void afs_destroy_vl_get_capabilities(struct afs_call *call)
{
+ afs_put_addrlist(call->vl_probe, afs_alist_trace_put_vlgetcaps);
afs_put_vlserver(call->net, call->vlserver);
afs_flat_call_destructor(call);
}
@@ -378,7 +382,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
* other end supports.
*/
struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
- struct afs_addr_cursor *ac,
+ struct afs_addr_list *alist,
+ unsigned int addr_index,
struct key *key,
struct afs_vlserver *server,
unsigned int server_index)
@@ -395,6 +400,10 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
call->key = key;
call->vlserver = afs_get_vlserver(server);
call->server_index = server_index;
+ call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer);
+ call->vl_probe = afs_get_addrlist(alist, afs_alist_trace_get_vlgetcaps);
+ call->probe_index = addr_index;
+ call->service_id = server->service_id;
call->upgrade = true;
call->async = true;
call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
@@ -405,7 +414,7 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
/* Can't take a ref on server */
trace_afs_make_vl_call(call);
- afs_make_call(ac, call, GFP_KERNEL);
+ afs_make_call(call, GFP_KERNEL);
return call;
}
@@ -450,7 +459,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (call->count > YFS_MAXENDPOINTS)
return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num);
- alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
+ alist = afs_alloc_addrlist(call->count);
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
@@ -488,14 +497,18 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (ntohl(bp[0]) != sizeof(__be32) * 2)
return afs_protocol_error(
call, afs_eproto_yvl_fsendpt4_len);
- afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
+ ret = afs_merge_fs_addr4(call->net, alist, bp[1], ntohl(bp[2]));
+ if (ret < 0)
+ return ret;
bp += 3;
break;
case YFS_ENDPOINT_IPV6:
if (ntohl(bp[0]) != sizeof(__be32) * 5)
return afs_protocol_error(
call, afs_eproto_yvl_fsendpt6_len);
- afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
+ ret = afs_merge_fs_addr6(call->net, alist, bp + 1, ntohl(bp[5]));
+ if (ret < 0)
+ return ret;
bp += 6;
break;
default:
@@ -610,7 +623,7 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
.name = "YFSVL.GetEndpoints",
.op = afs_YFSVL_GetEndpoints,
.deliver = afs_deliver_yfsvl_get_endpoints,
- .destructor = afs_vl_get_addrs_u_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -620,6 +633,7 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
const uuid_t *uuid)
{
+ struct afs_addr_list *alist;
struct afs_call *call;
struct afs_net *net = vc->cell->net;
__be32 *bp;
@@ -635,6 +649,8 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
call->key = vc->key;
call->ret_alist = NULL;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* Marshall the parameters */
bp = call->request;
@@ -643,8 +659,18 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ alist = call->ret_alist;
+ afs_put_call(call);
+ if (vc->call_error) {
+ afs_put_addrlist(alist, afs_alist_trace_put_getaddru);
+ return ERR_PTR(vc->call_error);
+ }
+ return alist;
}
/*
@@ -709,12 +735,6 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
return 0;
}
-static void afs_destroy_yfsvl_get_cell_name(struct afs_call *call)
-{
- kfree(call->ret_str);
- afs_flat_call_destructor(call);
-}
-
/*
* VL.GetCapabilities operation type
*/
@@ -722,7 +742,7 @@ static const struct afs_call_type afs_YFSVLGetCellName = {
.name = "YFSVL.GetCellName",
.op = afs_YFSVL_GetCellName,
.deliver = afs_deliver_yfsvl_get_cell_name,
- .destructor = afs_destroy_yfsvl_get_cell_name,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -737,6 +757,7 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
struct afs_call *call;
struct afs_net *net = vc->cell->net;
__be32 *bp;
+ char *cellname;
_enter("");
@@ -747,6 +768,8 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
call->key = vc->key;
call->ret_str = NULL;
call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+ call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+ call->service_id = vc->server->service_id;
/* marshall the parameters */
bp = call->request;
@@ -754,6 +777,16 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
/* Can't take a ref on server */
trace_afs_make_vl_call(call);
- afs_make_call(&vc->ac, call, GFP_KERNEL);
- return (char *)afs_wait_for_call_to_complete(call, &vc->ac);
+ afs_make_call(call, GFP_KERNEL);
+ afs_wait_for_call_to_complete(call);
+ vc->call_abort_code = call->abort_code;
+ vc->call_error = call->error;
+ vc->call_responded = call->responded;
+ cellname = call->ret_str;
+ afs_put_call(call);
+ if (vc->call_error) {
+ kfree(cellname);
+ return ERR_PTR(vc->call_error);
+ }
+ return cellname;
}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 115c081a8e2c..020ecd45e476 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -11,6 +11,8 @@
static unsigned __read_mostly afs_volume_record_life = 60 * 60;
+static void afs_destroy_volume(struct work_struct *work);
+
/*
* Insert a volume into a cell. If there's an existing volume record, that is
* returned instead with a ref held.
@@ -72,11 +74,11 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
*/
static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
struct afs_vldb_entry *vldb,
- unsigned long type_mask)
+ struct afs_server_list **_slist)
{
struct afs_server_list *slist;
struct afs_volume *volume;
- int ret = -ENOMEM;
+ int ret = -ENOMEM, i;
volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
if (!volume)
@@ -88,20 +90,30 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
volume->type = params->type;
volume->type_force = params->force;
volume->name_len = vldb->name_len;
+ volume->creation_time = TIME64_MIN;
+ volume->update_time = TIME64_MIN;
refcount_set(&volume->ref, 1);
INIT_HLIST_NODE(&volume->proc_link);
+ INIT_WORK(&volume->destructor, afs_destroy_volume);
rwlock_init(&volume->servers_lock);
+ mutex_init(&volume->volsync_lock);
+ mutex_init(&volume->cb_check_lock);
rwlock_init(&volume->cb_v_break_lock);
+ INIT_LIST_HEAD(&volume->open_mmaps);
+ init_rwsem(&volume->open_mmaps_lock);
memcpy(volume->name, vldb->name, vldb->name_len + 1);
- slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
+ for (i = 0; i < AFS_MAXTYPES; i++)
+ volume->vids[i] = vldb->vid[i];
+
+ slist = afs_alloc_server_list(volume, params->key, vldb);
if (IS_ERR(slist)) {
ret = PTR_ERR(slist);
goto error_1;
}
- refcount_set(&slist->usage, 1);
+ *_slist = slist;
rcu_assign_pointer(volume->servers, slist);
trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
return volume;
@@ -117,18 +129,20 @@ error_0:
* Look up or allocate a volume record.
*/
static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
- struct afs_vldb_entry *vldb,
- unsigned long type_mask)
+ struct afs_vldb_entry *vldb)
{
+ struct afs_server_list *slist;
struct afs_volume *candidate, *volume;
- candidate = afs_alloc_volume(params, vldb, type_mask);
+ candidate = afs_alloc_volume(params, vldb, &slist);
if (IS_ERR(candidate))
return candidate;
volume = afs_insert_volume_into_cell(params->cell, candidate);
- if (volume != candidate)
- afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup);
+ if (volume == candidate)
+ afs_attach_volume_to_servers(volume, slist);
+ else
+ afs_put_volume(candidate, afs_volume_trace_put_cell_dup);
return volume;
}
@@ -208,8 +222,7 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params)
goto error;
}
- type_mask = 1UL << params->type;
- volume = afs_lookup_volume(params, vldb, type_mask);
+ volume = afs_lookup_volume(params, vldb);
error:
kfree(vldb);
@@ -219,16 +232,20 @@ error:
/*
* Destroy a volume record
*/
-static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
+static void afs_destroy_volume(struct work_struct *work)
{
+ struct afs_volume *volume = container_of(work, struct afs_volume, destructor);
+ struct afs_server_list *slist = rcu_access_pointer(volume->servers);
+
_enter("%p", volume);
#ifdef CONFIG_AFS_FSCACHE
ASSERTCMP(volume->cache, ==, NULL);
#endif
+ afs_detach_volume_from_servers(volume, slist);
afs_remove_volume_from_cell(volume);
- afs_put_serverlist(net, rcu_access_pointer(volume->servers));
+ afs_put_serverlist(volume->cell->net, slist);
afs_put_cell(volume->cell, afs_cell_trace_put_vol);
trace_afs_volume(volume->vid, refcount_read(&volume->ref),
afs_volume_trace_free);
@@ -270,8 +287,7 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume,
/*
* Drop a reference on a volume record.
*/
-void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
- enum afs_volume_trace reason)
+void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason)
{
if (volume) {
afs_volid_t vid = volume->vid;
@@ -281,7 +297,7 @@ void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
zero = __refcount_dec_and_test(&volume->ref, &r);
trace_afs_volume(vid, r - 1, reason);
if (zero)
- afs_destroy_volume(net, volume);
+ schedule_work(&volume->destructor);
}
}
@@ -362,8 +378,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
}
/* See if the volume's server list got updated. */
- new = afs_alloc_server_list(volume->cell, key,
- vldb, (1 << volume->type));
+ new = afs_alloc_server_list(volume, key, vldb);
if (IS_ERR(new)) {
ret = PTR_ERR(new);
goto error_vldb;
@@ -382,11 +397,17 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
discard = old;
}
- volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+ /* Check more often if replication is ongoing. */
+ if (new->ro_replicating)
+ volume->update_at = ktime_get_real_seconds() + 10 * 60;
+ else
+ volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
write_unlock(&volume->servers_lock);
- ret = 0;
+ if (discard == old)
+ afs_reattach_volume_to_servers(volume, new, old);
afs_put_serverlist(volume->cell->net, discard);
+ ret = 0;
error_vldb:
kfree(vldb);
error:
diff --git a/fs/afs/write.c b/fs/afs/write.c
index e87b52b1f34c..61d34ad2ca7d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -366,7 +366,7 @@ static void afs_store_data_success(struct afs_operation *op)
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
- if (op->error == 0) {
+ if (!afs_op_error(op)) {
if (!op->store.laundering)
afs_pages_written_back(vnode, op->store.pos, op->store.size);
afs_stat_v(vnode, n_stores);
@@ -428,7 +428,7 @@ try_next_key:
afs_wait_for_operation(op);
- switch (op->error) {
+ switch (afs_op_error(op)) {
case -EACCES:
case -EPERM:
case -ENOKEY:
@@ -447,7 +447,7 @@ try_next_key:
}
afs_put_wb_key(wbk);
- _leave(" = %d", op->error);
+ _leave(" = %d", afs_op_error(op));
return afs_put_operation(op);
}
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 11571cca86c1..f521e66d3bf6 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -245,12 +245,15 @@ static void xdr_decode_YFSVolSync(const __be32 **_bp,
struct afs_volsync *volsync)
{
struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
- u64 creation;
+ u64 creation, update;
if (volsync) {
creation = xdr_to_u64(x->vol_creation_date);
do_div(creation, 10 * 1000 * 1000);
volsync->creation = creation;
+ update = xdr_to_u64(x->vol_update_date);
+ do_div(update, 10 * 1000 * 1000);
+ volsync->update = update;
}
*_bp += xdr_size(x);
@@ -490,6 +493,7 @@ void yfs_fs_fetch_data(struct afs_operation *op)
bp = xdr_encode_u64(bp, req->len);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -572,6 +576,7 @@ void yfs_fs_create_file(struct afs_operation *op)
bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -620,6 +625,7 @@ void yfs_fs_make_dir(struct afs_operation *op)
bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -704,6 +710,7 @@ void yfs_fs_remove_file2(struct afs_operation *op)
bp = xdr_encode_name(bp, name);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -773,6 +780,7 @@ void yfs_fs_remove_file(struct afs_operation *op)
bp = xdr_encode_name(bp, name);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -814,6 +822,7 @@ void yfs_fs_remove_dir(struct afs_operation *op)
bp = xdr_encode_name(bp, name);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -887,6 +896,7 @@ void yfs_fs_link(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call1(call, &vp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -968,6 +978,7 @@ void yfs_fs_symlink(struct afs_operation *op)
bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime);
yfs_check_req(call, bp);
+ call->fid = dvp->fid;
trace_afs_make_fs_call1(call, &dvp->fid, name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1047,6 +1058,7 @@ void yfs_fs_rename(struct afs_operation *op)
bp = xdr_encode_name(bp, new_name);
yfs_check_req(call, bp);
+ call->fid = orig_dvp->fid;
trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1102,6 +1114,7 @@ void yfs_fs_store_data(struct afs_operation *op)
bp = xdr_encode_u64(bp, op->store.i_size);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1158,6 +1171,7 @@ static void yfs_fs_setattr_size(struct afs_operation *op)
bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1196,6 +1210,7 @@ void yfs_fs_setattr(struct afs_operation *op)
bp = xdr_encode_YFS_StoreStatus(bp, attr);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1366,6 +1381,7 @@ void yfs_fs_get_volume_status(struct afs_operation *op)
bp = xdr_encode_u64(bp, vp->fid.vid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1430,6 +1446,7 @@ void yfs_fs_set_lock(struct afs_operation *op)
bp = xdr_encode_u32(bp, op->lock.type);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1460,6 +1477,7 @@ void yfs_fs_extend_lock(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1490,6 +1508,7 @@ void yfs_fs_release_lock(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1556,6 +1575,7 @@ void yfs_fs_fetch_status(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1736,6 +1756,7 @@ void yfs_fs_inline_bulk_status(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
@@ -1898,6 +1919,7 @@ void yfs_fs_fetch_opaque_acl(struct afs_operation *op)
bp = xdr_encode_YFSFid(bp, &vp->fid);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
@@ -1948,6 +1970,7 @@ void yfs_fs_store_opaque_acl2(struct afs_operation *op)
bp += size / sizeof(__be32);
yfs_check_req(call, bp);
+ call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_KERNEL);
}
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 5531dd08061e..0754c463224a 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
struct sock;
struct socket;
struct rxrpc_call;
+struct rxrpc_peer;
enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
@@ -41,13 +42,14 @@ void rxrpc_kernel_new_call_notification(struct socket *,
rxrpc_notify_new_call_t,
rxrpc_discard_new_call_t);
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id);
@@ -60,9 +62,14 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, enum rxrpc_abort_reason);
void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call);
void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call);
-void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
- struct sockaddr_rxrpc *);
-bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call);
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer);
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index e9d412d19dbb..5194b7e6dc8d 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -18,97 +18,6 @@
#ifndef __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
-enum afs_call_trace {
- afs_call_trace_alloc,
- afs_call_trace_free,
- afs_call_trace_get,
- afs_call_trace_put,
- afs_call_trace_wake,
- afs_call_trace_work,
-};
-
-enum afs_server_trace {
- afs_server_trace_alloc,
- afs_server_trace_callback,
- afs_server_trace_destroy,
- afs_server_trace_free,
- afs_server_trace_gc,
- afs_server_trace_get_by_addr,
- afs_server_trace_get_by_uuid,
- afs_server_trace_get_caps,
- afs_server_trace_get_install,
- afs_server_trace_get_new_cbi,
- afs_server_trace_get_probe,
- afs_server_trace_give_up_cb,
- afs_server_trace_purging,
- afs_server_trace_put_call,
- afs_server_trace_put_cbi,
- afs_server_trace_put_find_rsq,
- afs_server_trace_put_probe,
- afs_server_trace_put_slist,
- afs_server_trace_put_slist_isort,
- afs_server_trace_put_uuid_rsq,
- afs_server_trace_update,
-};
-
-
-enum afs_volume_trace {
- afs_volume_trace_alloc,
- afs_volume_trace_free,
- afs_volume_trace_get_alloc_sbi,
- afs_volume_trace_get_cell_insert,
- afs_volume_trace_get_new_op,
- afs_volume_trace_get_query_alias,
- afs_volume_trace_put_cell_dup,
- afs_volume_trace_put_cell_root,
- afs_volume_trace_put_destroy_sbi,
- afs_volume_trace_put_free_fc,
- afs_volume_trace_put_put_op,
- afs_volume_trace_put_query_alias,
- afs_volume_trace_put_validate_fc,
- afs_volume_trace_remove,
-};
-
-enum afs_cell_trace {
- afs_cell_trace_alloc,
- afs_cell_trace_free,
- afs_cell_trace_get_queue_dns,
- afs_cell_trace_get_queue_manage,
- afs_cell_trace_get_queue_new,
- afs_cell_trace_get_vol,
- afs_cell_trace_insert,
- afs_cell_trace_manage,
- afs_cell_trace_put_candidate,
- afs_cell_trace_put_destroy,
- afs_cell_trace_put_queue_fail,
- afs_cell_trace_put_queue_work,
- afs_cell_trace_put_vol,
- afs_cell_trace_see_source,
- afs_cell_trace_see_ws,
- afs_cell_trace_unuse_alias,
- afs_cell_trace_unuse_check_alias,
- afs_cell_trace_unuse_delete,
- afs_cell_trace_unuse_fc,
- afs_cell_trace_unuse_lookup,
- afs_cell_trace_unuse_mntpt,
- afs_cell_trace_unuse_no_pin,
- afs_cell_trace_unuse_parse,
- afs_cell_trace_unuse_pin,
- afs_cell_trace_unuse_probe,
- afs_cell_trace_unuse_sbi,
- afs_cell_trace_unuse_ws,
- afs_cell_trace_use_alias,
- afs_cell_trace_use_check_alias,
- afs_cell_trace_use_fc,
- afs_cell_trace_use_fc_alias,
- afs_cell_trace_use_lookup,
- afs_cell_trace_use_mntpt,
- afs_cell_trace_use_pin,
- afs_cell_trace_use_probe,
- afs_cell_trace_use_sbi,
- afs_cell_trace_wait,
-};
-
enum afs_fs_operation {
afs_FS_FetchData = 130, /* AFS Fetch file data */
afs_FS_FetchACL = 131, /* AFS Fetch file ACL */
@@ -202,121 +111,6 @@ enum yfs_cm_operation {
yfs_CB_CallBack = 64204,
};
-enum afs_edit_dir_op {
- afs_edit_dir_create,
- afs_edit_dir_create_error,
- afs_edit_dir_create_inval,
- afs_edit_dir_create_nospc,
- afs_edit_dir_delete,
- afs_edit_dir_delete_error,
- afs_edit_dir_delete_inval,
- afs_edit_dir_delete_noent,
-};
-
-enum afs_edit_dir_reason {
- afs_edit_dir_for_create,
- afs_edit_dir_for_link,
- afs_edit_dir_for_mkdir,
- afs_edit_dir_for_rename_0,
- afs_edit_dir_for_rename_1,
- afs_edit_dir_for_rename_2,
- afs_edit_dir_for_rmdir,
- afs_edit_dir_for_silly_0,
- afs_edit_dir_for_silly_1,
- afs_edit_dir_for_symlink,
- afs_edit_dir_for_unlink,
-};
-
-enum afs_eproto_cause {
- afs_eproto_bad_status,
- afs_eproto_cb_count,
- afs_eproto_cb_fid_count,
- afs_eproto_cellname_len,
- afs_eproto_file_type,
- afs_eproto_ibulkst_cb_count,
- afs_eproto_ibulkst_count,
- afs_eproto_motd_len,
- afs_eproto_offline_msg_len,
- afs_eproto_volname_len,
- afs_eproto_yvl_fsendpt4_len,
- afs_eproto_yvl_fsendpt6_len,
- afs_eproto_yvl_fsendpt_num,
- afs_eproto_yvl_fsendpt_type,
- afs_eproto_yvl_vlendpt4_len,
- afs_eproto_yvl_vlendpt6_len,
- afs_eproto_yvl_vlendpt_type,
-};
-
-enum afs_io_error {
- afs_io_error_cm_reply,
- afs_io_error_extract,
- afs_io_error_fs_probe_fail,
- afs_io_error_vl_lookup_fail,
- afs_io_error_vl_probe_fail,
-};
-
-enum afs_file_error {
- afs_file_error_dir_bad_magic,
- afs_file_error_dir_big,
- afs_file_error_dir_missing_page,
- afs_file_error_dir_name_too_long,
- afs_file_error_dir_over_end,
- afs_file_error_dir_small,
- afs_file_error_dir_unmarked_ext,
- afs_file_error_mntpt,
- afs_file_error_writeback_fail,
-};
-
-enum afs_flock_event {
- afs_flock_acquired,
- afs_flock_callback_break,
- afs_flock_defer_unlock,
- afs_flock_extend_fail,
- afs_flock_fail_other,
- afs_flock_fail_perm,
- afs_flock_no_lockers,
- afs_flock_release_fail,
- afs_flock_silly_delete,
- afs_flock_timestamp,
- afs_flock_try_to_lock,
- afs_flock_vfs_lock,
- afs_flock_vfs_locking,
- afs_flock_waited,
- afs_flock_waiting,
- afs_flock_work_extending,
- afs_flock_work_retry,
- afs_flock_work_unlocking,
- afs_flock_would_block,
-};
-
-enum afs_flock_operation {
- afs_flock_op_copy_lock,
- afs_flock_op_flock,
- afs_flock_op_grant,
- afs_flock_op_lock,
- afs_flock_op_release_lock,
- afs_flock_op_return_ok,
- afs_flock_op_return_eagain,
- afs_flock_op_return_edeadlk,
- afs_flock_op_return_error,
- afs_flock_op_set_lock,
- afs_flock_op_unlock,
- afs_flock_op_wake,
-};
-
-enum afs_cb_break_reason {
- afs_cb_break_no_break,
- afs_cb_break_no_promise,
- afs_cb_break_for_callback,
- afs_cb_break_for_deleted,
- afs_cb_break_for_lapsed,
- afs_cb_break_for_s_reinit,
- afs_cb_break_for_unlink,
- afs_cb_break_for_v_break,
- afs_cb_break_for_volume_callback,
- afs_cb_break_for_zap,
-};
-
#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
/*
@@ -357,9 +151,11 @@ enum afs_cb_break_reason {
EM(afs_volume_trace_alloc, "ALLOC ") \
EM(afs_volume_trace_free, "FREE ") \
EM(afs_volume_trace_get_alloc_sbi, "GET sbi-alloc ") \
+ EM(afs_volume_trace_get_callback, "GET callback ") \
EM(afs_volume_trace_get_cell_insert, "GET cell-insrt") \
EM(afs_volume_trace_get_new_op, "GET op-new ") \
EM(afs_volume_trace_get_query_alias, "GET cell-alias") \
+ EM(afs_volume_trace_put_callback, "PUT callback ") \
EM(afs_volume_trace_put_cell_dup, "PUT cell-dup ") \
EM(afs_volume_trace_put_cell_root, "PUT cell-root ") \
EM(afs_volume_trace_put_destroy_sbi, "PUT sbi-destry") \
@@ -391,6 +187,7 @@ enum afs_cb_break_reason {
EM(afs_cell_trace_unuse_fc, "UNU fc ") \
EM(afs_cell_trace_unuse_lookup, "UNU lookup") \
EM(afs_cell_trace_unuse_mntpt, "UNU mntpt ") \
+ EM(afs_cell_trace_unuse_no_pin, "UNU no-pin") \
EM(afs_cell_trace_unuse_parse, "UNU parse ") \
EM(afs_cell_trace_unuse_pin, "UNU pin ") \
EM(afs_cell_trace_unuse_probe, "UNU probe ") \
@@ -407,6 +204,40 @@ enum afs_cb_break_reason {
EM(afs_cell_trace_use_sbi, "USE sbi ") \
E_(afs_cell_trace_wait, "WAIT ")
+#define afs_alist_traces \
+ EM(afs_alist_trace_alloc, "ALLOC ") \
+ EM(afs_alist_trace_get_estate, "GET estate") \
+ EM(afs_alist_trace_get_vlgetcaps, "GET vgtcap") \
+ EM(afs_alist_trace_get_vlprobe, "GET vprobe") \
+ EM(afs_alist_trace_get_vlrotate_set, "GET vl-rot") \
+ EM(afs_alist_trace_put_estate, "PUT estate") \
+ EM(afs_alist_trace_put_getaddru, "PUT GtAdrU") \
+ EM(afs_alist_trace_put_parse_empty, "PUT p-empt") \
+ EM(afs_alist_trace_put_parse_error, "PUT p-err ") \
+ EM(afs_alist_trace_put_server_dup, "PUT sv-dup") \
+ EM(afs_alist_trace_put_server_oom, "PUT sv-oom") \
+ EM(afs_alist_trace_put_server_update, "PUT sv-upd") \
+ EM(afs_alist_trace_put_vlgetcaps, "PUT vgtcap") \
+ EM(afs_alist_trace_put_vlprobe, "PUT vprobe") \
+ EM(afs_alist_trace_put_vlrotate_end, "PUT vr-end") \
+ EM(afs_alist_trace_put_vlrotate_fail, "PUT vr-fai") \
+ EM(afs_alist_trace_put_vlrotate_next, "PUT vr-nxt") \
+ EM(afs_alist_trace_put_vlrotate_restart,"PUT vr-rst") \
+ EM(afs_alist_trace_put_vlserver, "PUT vlsrvr") \
+ EM(afs_alist_trace_put_vlserver_old, "PUT vs-old") \
+ E_(afs_alist_trace_free, "FREE ")
+
+#define afs_estate_traces \
+ EM(afs_estate_trace_alloc_probe, "ALLOC prob") \
+ EM(afs_estate_trace_alloc_server, "ALLOC srvr") \
+ EM(afs_estate_trace_get_server_state, "GET srv-st") \
+ EM(afs_estate_trace_get_getcaps, "GET getcap") \
+ EM(afs_estate_trace_put_getcaps, "PUT getcap") \
+ EM(afs_estate_trace_put_probe, "PUT probe ") \
+ EM(afs_estate_trace_put_server, "PUT server") \
+ EM(afs_estate_trace_put_server_state, "PUT srv-st") \
+ E_(afs_estate_trace_free, "FREE ")
+
#define afs_fs_operations \
EM(afs_FS_FetchData, "FS.FetchData") \
EM(afs_FS_FetchStatus, "FS.FetchStatus") \
@@ -604,15 +435,67 @@ enum afs_cb_break_reason {
#define afs_cb_break_reasons \
EM(afs_cb_break_no_break, "no-break") \
- EM(afs_cb_break_no_promise, "no-promise") \
EM(afs_cb_break_for_callback, "break-cb") \
+ EM(afs_cb_break_for_creation_regress, "creation-regress") \
EM(afs_cb_break_for_deleted, "break-del") \
- EM(afs_cb_break_for_lapsed, "break-lapsed") \
EM(afs_cb_break_for_s_reinit, "s-reinit") \
EM(afs_cb_break_for_unlink, "break-unlink") \
- EM(afs_cb_break_for_v_break, "break-v") \
+ EM(afs_cb_break_for_update_regress, "update-regress") \
EM(afs_cb_break_for_volume_callback, "break-v-cb") \
- E_(afs_cb_break_for_zap, "break-zap")
+ EM(afs_cb_break_for_vos_release, "break-vos-release") \
+ E_(afs_cb_break_volume_excluded, "vol-excluded")
+
+#define afs_rotate_traces \
+ EM(afs_rotate_trace_aborted, "Abortd") \
+ EM(afs_rotate_trace_busy_sleep, "BsySlp") \
+ EM(afs_rotate_trace_check_vol_status, "VolStt") \
+ EM(afs_rotate_trace_failed, "Failed") \
+ EM(afs_rotate_trace_iter, "Iter ") \
+ EM(afs_rotate_trace_iterate_addr, "ItAddr") \
+ EM(afs_rotate_trace_next_server, "NextSv") \
+ EM(afs_rotate_trace_no_more_servers, "NoMore") \
+ EM(afs_rotate_trace_nomem, "Nomem ") \
+ EM(afs_rotate_trace_probe_error, "PrbErr") \
+ EM(afs_rotate_trace_probe_fileserver, "PrbFsv") \
+ EM(afs_rotate_trace_probe_none, "PrbNon") \
+ EM(afs_rotate_trace_probe_response, "PrbRsp") \
+ EM(afs_rotate_trace_probe_superseded, "PrbSup") \
+ EM(afs_rotate_trace_restart, "Rstart") \
+ EM(afs_rotate_trace_retry_server, "RtrySv") \
+ EM(afs_rotate_trace_selected_server, "SlctSv") \
+ EM(afs_rotate_trace_stale_lock, "StlLck") \
+ EM(afs_rotate_trace_start, "Start ") \
+ EM(afs_rotate_trace_stop, "Stop ") \
+ E_(afs_rotate_trace_stopped, "Stoppd")
+
+/*
+ * Generate enums for tracing information.
+ */
+#ifndef __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY
+#define __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY
+
+#undef EM
+#undef E_
+#define EM(a, b) a,
+#define E_(a, b) a
+
+enum afs_alist_trace { afs_alist_traces } __mode(byte);
+enum afs_call_trace { afs_call_traces } __mode(byte);
+enum afs_cb_break_reason { afs_cb_break_reasons } __mode(byte);
+enum afs_cell_trace { afs_cell_traces } __mode(byte);
+enum afs_edit_dir_op { afs_edit_dir_ops } __mode(byte);
+enum afs_edit_dir_reason { afs_edit_dir_reasons } __mode(byte);
+enum afs_eproto_cause { afs_eproto_causes } __mode(byte);
+enum afs_estate_trace { afs_estate_traces } __mode(byte);
+enum afs_file_error { afs_file_errors } __mode(byte);
+enum afs_flock_event { afs_flock_events } __mode(byte);
+enum afs_flock_operation { afs_flock_operations } __mode(byte);
+enum afs_io_error { afs_io_errors } __mode(byte);
+enum afs_rotate_trace { afs_rotate_traces } __mode(byte);
+enum afs_server_trace { afs_server_traces } __mode(byte);
+enum afs_volume_trace { afs_volume_traces } __mode(byte);
+
+#endif /* end __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY */
/*
* Export enum symbols via userspace.
@@ -622,21 +505,24 @@ enum afs_cb_break_reason {
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define E_(a, b) TRACE_DEFINE_ENUM(a);
+afs_alist_traces;
afs_call_traces;
-afs_server_traces;
+afs_cb_break_reasons;
afs_cell_traces;
-afs_fs_operations;
-afs_vl_operations;
afs_cm_operations;
-yfs_cm_operations;
afs_edit_dir_ops;
afs_edit_dir_reasons;
afs_eproto_causes;
-afs_io_errors;
+afs_estate_traces;
afs_file_errors;
-afs_flock_types;
afs_flock_operations;
-afs_cb_break_reasons;
+afs_flock_types;
+afs_fs_operations;
+afs_io_errors;
+afs_rotate_traces;
+afs_server_traces;
+afs_vl_operations;
+yfs_cm_operations;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -654,12 +540,12 @@ TRACE_EVENT(afs_receive_data,
TP_ARGS(call, iter, want_more, ret),
TP_STRUCT__entry(
- __field(loff_t, remain )
- __field(unsigned int, call )
- __field(enum afs_call_state, state )
- __field(unsigned short, unmarshall )
- __field(bool, want_more )
- __field(int, ret )
+ __field(loff_t, remain)
+ __field(unsigned int, call)
+ __field(enum afs_call_state, state)
+ __field(unsigned short, unmarshall)
+ __field(bool, want_more)
+ __field(int, ret)
),
TP_fast_assign(
@@ -686,9 +572,9 @@ TRACE_EVENT(afs_notify_call,
TP_ARGS(rxcall, call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_call_state, state )
- __field(unsigned short, unmarshall )
+ __field(unsigned int, call)
+ __field(enum afs_call_state, state)
+ __field(unsigned short, unmarshall)
),
TP_fast_assign(
@@ -708,9 +594,9 @@ TRACE_EVENT(afs_cb_call,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(u32, op )
- __field(u16, service_id )
+ __field(unsigned int, call)
+ __field(u32, op)
+ __field(u16, service_id)
),
TP_fast_assign(
@@ -733,11 +619,11 @@ TRACE_EVENT(afs_call,
TP_ARGS(call_debug_id, op, ref, outstanding, where),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(int, op )
- __field(int, ref )
- __field(int, outstanding )
- __field(const void *, where )
+ __field(unsigned int, call)
+ __field(int, op)
+ __field(int, ref)
+ __field(int, outstanding)
+ __field(const void *, where)
),
TP_fast_assign(
@@ -762,9 +648,9 @@ TRACE_EVENT(afs_make_fs_call,
TP_ARGS(call, fid),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_fs_operation, op )
- __field_struct(struct afs_fid, fid )
+ __field(unsigned int, call)
+ __field(enum afs_fs_operation, op)
+ __field_struct(struct afs_fid, fid)
),
TP_fast_assign(
@@ -794,10 +680,10 @@ TRACE_EVENT(afs_make_fs_calli,
TP_ARGS(call, fid, i),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(unsigned int, i )
- __field(enum afs_fs_operation, op )
- __field_struct(struct afs_fid, fid )
+ __field(unsigned int, call)
+ __field(unsigned int, i)
+ __field(enum afs_fs_operation, op)
+ __field_struct(struct afs_fid, fid)
),
TP_fast_assign(
@@ -829,10 +715,10 @@ TRACE_EVENT(afs_make_fs_call1,
TP_ARGS(call, fid, name),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_fs_operation, op )
- __field_struct(struct afs_fid, fid )
- __array(char, name, 24 )
+ __field(unsigned int, call)
+ __field(enum afs_fs_operation, op)
+ __field_struct(struct afs_fid, fid)
+ __array(char, name, 24)
),
TP_fast_assign(
@@ -866,11 +752,11 @@ TRACE_EVENT(afs_make_fs_call2,
TP_ARGS(call, fid, name, name2),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_fs_operation, op )
- __field_struct(struct afs_fid, fid )
- __array(char, name, 24 )
- __array(char, name2, 24 )
+ __field(unsigned int, call)
+ __field(enum afs_fs_operation, op)
+ __field_struct(struct afs_fid, fid)
+ __array(char, name, 24)
+ __array(char, name2, 24)
),
TP_fast_assign(
@@ -907,8 +793,8 @@ TRACE_EVENT(afs_make_vl_call,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_vl_operation, op )
+ __field(unsigned int, call)
+ __field(enum afs_vl_operation, op)
),
TP_fast_assign(
@@ -927,10 +813,10 @@ TRACE_EVENT(afs_call_done,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(struct rxrpc_call *, rx_call )
- __field(int, ret )
- __field(u32, abort_code )
+ __field(unsigned int, call)
+ __field(struct rxrpc_call *, rx_call)
+ __field(int, ret)
+ __field(u32, abort_code)
),
TP_fast_assign(
@@ -953,10 +839,10 @@ TRACE_EVENT(afs_send_data,
TP_ARGS(call, msg),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(unsigned int, flags )
- __field(loff_t, offset )
- __field(loff_t, count )
+ __field(unsigned int, call)
+ __field(unsigned int, flags)
+ __field(loff_t, offset)
+ __field(loff_t, count)
),
TP_fast_assign(
@@ -977,10 +863,10 @@ TRACE_EVENT(afs_sent_data,
TP_ARGS(call, msg, ret),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(int, ret )
- __field(loff_t, offset )
- __field(loff_t, count )
+ __field(unsigned int, call)
+ __field(int, ret)
+ __field(loff_t, offset)
+ __field(loff_t, count)
),
TP_fast_assign(
@@ -1001,9 +887,9 @@ TRACE_EVENT(afs_dir_check_failed,
TP_ARGS(vnode, off, i_size),
TP_STRUCT__entry(
- __field(struct afs_vnode *, vnode )
- __field(loff_t, off )
- __field(loff_t, i_size )
+ __field(struct afs_vnode *, vnode)
+ __field(loff_t, off)
+ __field(loff_t, i_size)
),
TP_fast_assign(
@@ -1022,11 +908,11 @@ TRACE_EVENT(afs_folio_dirty,
TP_ARGS(vnode, where, folio),
TP_STRUCT__entry(
- __field(struct afs_vnode *, vnode )
- __field(const char *, where )
- __field(pgoff_t, index )
- __field(unsigned long, from )
- __field(unsigned long, to )
+ __field(struct afs_vnode *, vnode)
+ __field(const char *, where)
+ __field(pgoff_t, index)
+ __field(unsigned long, from)
+ __field(unsigned long, to)
),
TP_fast_assign(
@@ -1056,11 +942,11 @@ TRACE_EVENT(afs_call_state,
TP_ARGS(call, from, to, ret, remote_abort),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_call_state, from )
- __field(enum afs_call_state, to )
- __field(int, ret )
- __field(u32, abort )
+ __field(unsigned int, call)
+ __field(enum afs_call_state, from)
+ __field(enum afs_call_state, to)
+ __field(int, ret)
+ __field(u32, abort)
),
TP_fast_assign(
@@ -1084,9 +970,9 @@ TRACE_EVENT(afs_lookup,
TP_ARGS(dvnode, name, fid),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, dfid )
- __field_struct(struct afs_fid, fid )
- __array(char, name, 24 )
+ __field_struct(struct afs_fid, dfid)
+ __field_struct(struct afs_fid, fid)
+ __array(char, name, 24)
),
TP_fast_assign(
@@ -1116,15 +1002,15 @@ TRACE_EVENT(afs_edit_dir,
TP_ARGS(dvnode, why, op, block, slot, f_vnode, f_unique, name),
TP_STRUCT__entry(
- __field(unsigned int, vnode )
- __field(unsigned int, unique )
- __field(enum afs_edit_dir_reason, why )
- __field(enum afs_edit_dir_op, op )
- __field(unsigned int, block )
- __field(unsigned short, slot )
- __field(unsigned int, f_vnode )
- __field(unsigned int, f_unique )
- __array(char, name, 24 )
+ __field(unsigned int, vnode)
+ __field(unsigned int, unique)
+ __field(enum afs_edit_dir_reason, why)
+ __field(enum afs_edit_dir_op, op)
+ __field(unsigned int, block)
+ __field(unsigned short, slot)
+ __field(unsigned int, f_vnode)
+ __field(unsigned int, f_unique)
+ __array(char, name, 24)
),
TP_fast_assign(
@@ -1157,8 +1043,8 @@ TRACE_EVENT(afs_protocol_error,
TP_ARGS(call, cause),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum afs_eproto_cause, cause )
+ __field(unsigned int, call)
+ __field(enum afs_eproto_cause, cause)
),
TP_fast_assign(
@@ -1177,9 +1063,9 @@ TRACE_EVENT(afs_io_error,
TP_ARGS(call, error, where),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(int, error )
- __field(enum afs_io_error, where )
+ __field(unsigned int, call)
+ __field(int, error)
+ __field(enum afs_io_error, where)
),
TP_fast_assign(
@@ -1199,9 +1085,9 @@ TRACE_EVENT(afs_file_error,
TP_ARGS(vnode, error, where),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
- __field(int, error )
- __field(enum afs_file_error, where )
+ __field_struct(struct afs_fid, fid)
+ __field(int, error)
+ __field(enum afs_file_error, where)
),
TP_fast_assign(
@@ -1222,9 +1108,9 @@ TRACE_EVENT(afs_cm_no_server,
TP_ARGS(call, srx),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(unsigned int, op_id )
- __field_struct(struct sockaddr_rxrpc, srx )
+ __field(unsigned int, call)
+ __field(unsigned int, op_id)
+ __field_struct(struct sockaddr_rxrpc, srx)
),
TP_fast_assign(
@@ -1243,9 +1129,9 @@ TRACE_EVENT(afs_cm_no_server_u,
TP_ARGS(call, uuid),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(unsigned int, op_id )
- __field_struct(uuid_t, uuid )
+ __field(unsigned int, call)
+ __field(unsigned int, op_id)
+ __field_struct(uuid_t, uuid)
),
TP_fast_assign(
@@ -1265,11 +1151,11 @@ TRACE_EVENT(afs_flock_ev,
TP_ARGS(vnode, fl, event, error),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
- __field(enum afs_flock_event, event )
- __field(enum afs_lock_state, state )
- __field(int, error )
- __field(unsigned int, debug_id )
+ __field_struct(struct afs_fid, fid)
+ __field(enum afs_flock_event, event)
+ __field(enum afs_lock_state, state)
+ __field(int, error)
+ __field(unsigned int, debug_id)
),
TP_fast_assign(
@@ -1295,13 +1181,13 @@ TRACE_EVENT(afs_flock_op,
TP_ARGS(vnode, fl, op),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
- __field(loff_t, from )
- __field(loff_t, len )
- __field(enum afs_flock_operation, op )
- __field(unsigned char, type )
- __field(unsigned int, flags )
- __field(unsigned int, debug_id )
+ __field_struct(struct afs_fid, fid)
+ __field(loff_t, from)
+ __field(loff_t, len)
+ __field(enum afs_flock_operation, op)
+ __field(unsigned char, type)
+ __field(unsigned int, flags)
+ __field(unsigned int, debug_id)
),
TP_fast_assign(
@@ -1328,7 +1214,7 @@ TRACE_EVENT(afs_reload_dir,
TP_ARGS(vnode),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
+ __field_struct(struct afs_fid, fid)
),
TP_fast_assign(
@@ -1345,8 +1231,8 @@ TRACE_EVENT(afs_silly_rename,
TP_ARGS(vnode, done),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
- __field(bool, done )
+ __field_struct(struct afs_fid, fid)
+ __field(bool, done)
),
TP_fast_assign(
@@ -1365,9 +1251,9 @@ TRACE_EVENT(afs_get_tree,
TP_ARGS(cell, volume),
TP_STRUCT__entry(
- __field(u64, vid )
- __array(char, cell, 24 )
- __array(char, volume, 24 )
+ __field(u64, vid)
+ __array(char, cell, 24)
+ __array(char, volume, 24)
),
TP_fast_assign(
@@ -1385,6 +1271,30 @@ TRACE_EVENT(afs_get_tree,
__entry->cell, __entry->volume, __entry->vid)
);
+TRACE_EVENT(afs_cb_v_break,
+ TP_PROTO(afs_volid_t vid, unsigned int cb_v_break,
+ enum afs_cb_break_reason reason),
+
+ TP_ARGS(vid, cb_v_break, reason),
+
+ TP_STRUCT__entry(
+ __field(afs_volid_t, vid)
+ __field(unsigned int, cb_v_break)
+ __field(enum afs_cb_break_reason, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->vid = vid;
+ __entry->cb_v_break = cb_v_break;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("%llx vb=%x %s",
+ __entry->vid,
+ __entry->cb_v_break,
+ __print_symbolic(__entry->reason, afs_cb_break_reasons))
+ );
+
TRACE_EVENT(afs_cb_break,
TP_PROTO(struct afs_fid *fid, unsigned int cb_break,
enum afs_cb_break_reason reason, bool skipped),
@@ -1392,10 +1302,10 @@ TRACE_EVENT(afs_cb_break,
TP_ARGS(fid, cb_break, reason, skipped),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
- __field(unsigned int, cb_break )
- __field(enum afs_cb_break_reason, reason )
- __field(bool, skipped )
+ __field_struct(struct afs_fid, fid)
+ __field(unsigned int, cb_break)
+ __field(enum afs_cb_break_reason, reason)
+ __field(bool, skipped)
),
TP_fast_assign(
@@ -1418,8 +1328,8 @@ TRACE_EVENT(afs_cb_miss,
TP_ARGS(fid, reason),
TP_STRUCT__entry(
- __field_struct(struct afs_fid, fid )
- __field(enum afs_cb_break_reason, reason )
+ __field_struct(struct afs_fid, fid)
+ __field(enum afs_cb_break_reason, reason)
),
TP_fast_assign(
@@ -1439,10 +1349,10 @@ TRACE_EVENT(afs_server,
TP_ARGS(server_debug_id, ref, active, reason),
TP_STRUCT__entry(
- __field(unsigned int, server )
- __field(int, ref )
- __field(int, active )
- __field(int, reason )
+ __field(unsigned int, server)
+ __field(int, ref)
+ __field(int, active)
+ __field(int, reason)
),
TP_fast_assign(
@@ -1465,9 +1375,9 @@ TRACE_EVENT(afs_volume,
TP_ARGS(vid, ref, reason),
TP_STRUCT__entry(
- __field(afs_volid_t, vid )
- __field(int, ref )
- __field(enum afs_volume_trace, reason )
+ __field(afs_volid_t, vid)
+ __field(int, ref)
+ __field(enum afs_volume_trace, reason)
),
TP_fast_assign(
@@ -1489,10 +1399,10 @@ TRACE_EVENT(afs_cell,
TP_ARGS(cell_debug_id, ref, active, reason),
TP_STRUCT__entry(
- __field(unsigned int, cell )
- __field(int, ref )
- __field(int, active )
- __field(int, reason )
+ __field(unsigned int, cell)
+ __field(int, ref)
+ __field(int, active)
+ __field(int, reason)
),
TP_fast_assign(
@@ -1509,6 +1419,199 @@ TRACE_EVENT(afs_cell,
__entry->active)
);
+TRACE_EVENT(afs_alist,
+ TP_PROTO(unsigned int alist_debug_id, int ref, enum afs_alist_trace reason),
+
+ TP_ARGS(alist_debug_id, ref, reason),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, alist)
+ __field(int, ref)
+ __field(int, active)
+ __field(int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->alist = alist_debug_id;
+ __entry->ref = ref;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("AL=%08x %s r=%d",
+ __entry->alist,
+ __print_symbolic(__entry->reason, afs_alist_traces),
+ __entry->ref)
+ );
+
+TRACE_EVENT(afs_estate,
+ TP_PROTO(unsigned int server_debug_id, unsigned int estate_debug_id,
+ int ref, enum afs_estate_trace reason),
+
+ TP_ARGS(server_debug_id, estate_debug_id, ref, reason),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, server)
+ __field(unsigned int, estate)
+ __field(int, ref)
+ __field(int, active)
+ __field(int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->server = server_debug_id;
+ __entry->estate = estate_debug_id;
+ __entry->ref = ref;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("ES=%08x[%x] %s r=%d",
+ __entry->server,
+ __entry->estate,
+ __print_symbolic(__entry->reason, afs_estate_traces),
+ __entry->ref)
+ );
+
+TRACE_EVENT(afs_fs_probe,
+ TP_PROTO(struct afs_server *server, bool tx, struct afs_endpoint_state *estate,
+ unsigned int addr_index, int error, s32 abort_code, unsigned int rtt_us),
+
+ TP_ARGS(server, tx, estate, addr_index, error, abort_code, rtt_us),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, server)
+ __field(unsigned int, estate)
+ __field(bool, tx)
+ __field(u16, addr_index)
+ __field(short, error)
+ __field(s32, abort_code)
+ __field(unsigned int, rtt_us)
+ __field_struct(struct sockaddr_rxrpc, srx)
+ ),
+
+ TP_fast_assign(
+ struct afs_addr_list *alist = estate->addresses;
+ __entry->server = server->debug_id;
+ __entry->estate = estate->probe_seq;
+ __entry->tx = tx;
+ __entry->addr_index = addr_index;
+ __entry->error = error;
+ __entry->abort_code = abort_code;
+ __entry->rtt_us = rtt_us;
+ memcpy(&__entry->srx, rxrpc_kernel_remote_srx(alist->addrs[addr_index].peer),
+ sizeof(__entry->srx));
+ ),
+
+ TP_printk("s=%08x %s pq=%x ax=%u e=%d ac=%d rtt=%d %pISpc",
+ __entry->server, __entry->tx ? "tx" : "rx", __entry->estate,
+ __entry->addr_index, __entry->error, __entry->abort_code, __entry->rtt_us,
+ &__entry->srx.transport)
+ );
+
+TRACE_EVENT(afs_vl_probe,
+ TP_PROTO(struct afs_vlserver *server, bool tx, struct afs_addr_list *alist,
+ unsigned int addr_index, int error, s32 abort_code, unsigned int rtt_us),
+
+ TP_ARGS(server, tx, alist, addr_index, error, abort_code, rtt_us),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, server)
+ __field(bool, tx)
+ __field(unsigned short, flags)
+ __field(u16, addr_index)
+ __field(short, error)
+ __field(s32, abort_code)
+ __field(unsigned int, rtt_us)
+ __field_struct(struct sockaddr_rxrpc, srx)
+ ),
+
+ TP_fast_assign(
+ __entry->server = server->debug_id;
+ __entry->tx = tx;
+ __entry->addr_index = addr_index;
+ __entry->error = error;
+ __entry->abort_code = abort_code;
+ __entry->rtt_us = rtt_us;
+ memcpy(&__entry->srx, rxrpc_kernel_remote_srx(alist->addrs[addr_index].peer),
+ sizeof(__entry->srx));
+ ),
+
+ TP_printk("vl=%08x %s ax=%u e=%d ac=%d rtt=%d %pISpc",
+ __entry->server, __entry->tx ? "tx" : "rx", __entry->addr_index,
+ __entry->error, __entry->abort_code, __entry->rtt_us,
+ &__entry->srx.transport)
+ );
+
+TRACE_EVENT(afs_rotate,
+ TP_PROTO(struct afs_operation *op, enum afs_rotate_trace reason, unsigned int extra),
+
+ TP_ARGS(op, reason, extra),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, op)
+ __field(unsigned int, flags)
+ __field(unsigned int, extra)
+ __field(unsigned short, iteration)
+ __field(short, server_index)
+ __field(short, addr_index)
+ __field(enum afs_rotate_trace, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->op = op->debug_id;
+ __entry->flags = op->flags;
+ __entry->iteration = op->nr_iterations;
+ __entry->server_index = op->server_index;
+ __entry->addr_index = op->addr_index;
+ __entry->reason = reason;
+ __entry->extra = extra;
+ ),
+
+ TP_printk("OP=%08x it=%02x %s fl=%x sx=%d ax=%d ext=%d",
+ __entry->op,
+ __entry->iteration,
+ __print_symbolic(__entry->reason, afs_rotate_traces),
+ __entry->flags,
+ __entry->server_index,
+ __entry->addr_index,
+ __entry->extra)
+ );
+
+TRACE_EVENT(afs_make_call,
+ TP_PROTO(struct afs_call *call),
+
+ TP_ARGS(call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(bool, is_vl)
+ __field(enum afs_fs_operation, op)
+ __field_struct(struct afs_fid, fid)
+ __field_struct(struct sockaddr_rxrpc, srx)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->op = call->operation_ID;
+ __entry->fid = call->fid;
+ memcpy(&__entry->srx, rxrpc_kernel_remote_srx(call->peer),
+ sizeof(__entry->srx));
+ __entry->srx.srx_service = call->service_id;
+ __entry->is_vl = (__entry->srx.srx_service == VL_SERVICE ||
+ __entry->srx.srx_service == YFS_VL_SERVICE);
+ ),
+
+ TP_printk("c=%08x %pISpc+%u %s %llx:%llx:%x",
+ __entry->call,
+ &__entry->srx.transport,
+ __entry->srx.srx_service,
+ __entry->is_vl ?
+ __print_symbolic(__entry->op, afs_vl_operations) :
+ __print_symbolic(__entry->op, afs_fs_operations),
+ __entry->fid.vid,
+ __entry->fid.vnode,
+ __entry->fid.unique)
+ );
+
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index f7e537f64db4..4c1ef7b3705c 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -178,7 +178,9 @@
#define rxrpc_peer_traces \
EM(rxrpc_peer_free, "FREE ") \
EM(rxrpc_peer_get_accept, "GET accept ") \
+ EM(rxrpc_peer_get_application, "GET app ") \
EM(rxrpc_peer_get_bundle, "GET bundle ") \
+ EM(rxrpc_peer_get_call, "GET call ") \
EM(rxrpc_peer_get_client_conn, "GET cln-conn") \
EM(rxrpc_peer_get_input, "GET input ") \
EM(rxrpc_peer_get_input_error, "GET inpt-err") \
@@ -187,6 +189,7 @@
EM(rxrpc_peer_get_service_conn, "GET srv-conn") \
EM(rxrpc_peer_new_client, "NEW client ") \
EM(rxrpc_peer_new_prealloc, "NEW prealloc") \
+ EM(rxrpc_peer_put_application, "PUT app ") \
EM(rxrpc_peer_put_bundle, "PUT bundle ") \
EM(rxrpc_peer_put_call, "PUT call ") \
EM(rxrpc_peer_put_conn, "PUT conn ") \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fa8aec78f63d..465bfe5eb061 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -259,15 +259,61 @@ static int rxrpc_listen(struct socket *sock, int backlog)
}
/**
+ * rxrpc_kernel_lookup_peer - Obtain remote transport endpoint for an address
+ * @sock: The socket through which it will be accessed
+ * @srx: The network address
+ * @gfp: Allocation flags
+ *
+ * Lookup or create a remote transport endpoint record for the specified
+ * address and return it with a ref held.
+ */
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ return rxrpc_lookup_peer(rx->local, srx, gfp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_lookup_peer);
+
+/**
+ * rxrpc_kernel_get_peer - Get a reference on a peer
+ * @peer: The peer to get a reference on.
+ *
+ * Get a record for the remote peer in a call.
+ */
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer)
+{
+ return peer ? rxrpc_get_peer(peer, rxrpc_peer_get_application) : NULL;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference
+ * @peer: The peer to drop a ref on
+ */
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer)
+{
+ rxrpc_put_peer(peer, rxrpc_peer_put_application);
+}
+EXPORT_SYMBOL(rxrpc_kernel_put_peer);
+
+/**
* rxrpc_kernel_begin_call - Allow a kernel service to begin a call
* @sock: The socket on which to make the call
- * @srx: The address of the peer to contact
+ * @peer: The peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @hard_timeout: The maximum lifespan of the call in sec
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @service_id: The ID of the service to contact
* @upgrade: Request service upgrade for call
* @interruptibility: The call is interruptible, or can be canceled.
* @debug_id: The debug ID for tracing to be assigned to the call
@@ -280,13 +326,14 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* supplying @srx and @key.
*/
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id)
@@ -295,13 +342,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct rxrpc_call_params p;
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- int ret;
_enter(",,%x,%lx", key_serial(key), user_call_ID);
- ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
- if (ret < 0)
- return ERR_PTR(ret);
+ if (WARN_ON_ONCE(peer->local != rx->local))
+ return ERR_PTR(-EIO);
lock_sock(&rx->sk);
@@ -319,12 +364,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
+ cp.peer = peer;
cp.key = key;
cp.security_level = rx->min_sec_level;
cp.exclusive = false;
cp.upgrade = upgrade;
- cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id);
+ cp.service_id = service_id;
+ call = rxrpc_new_client_call(rx, &cp, &p, gfp, debug_id);
/* The socket has been unlocked. */
if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e8e14c6f904d..2f8b39a614c3 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -68,6 +68,7 @@ struct rxrpc_net {
atomic_t nr_calls; /* Count of allocated calls */
atomic_t nr_conns;
+ struct list_head bundle_proc_list; /* List of bundles for proc */
struct list_head conn_proc_list; /* List of conns in this namespace for proc */
struct list_head service_conns; /* Service conns in this namespace */
rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
@@ -364,6 +365,7 @@ struct rxrpc_conn_proto {
struct rxrpc_conn_parameters {
struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Representation of remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
@@ -431,6 +433,7 @@ struct rxrpc_bundle {
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
+ struct list_head proc_link; /* Link in net->bundle_proc_list */
const struct rxrpc_security *security; /* applied security module */
refcount_t ref;
atomic_t active; /* Number of active users */
@@ -444,6 +447,7 @@ struct rxrpc_bundle {
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
unsigned long avail_chans; /* Mask of available channels */
+ unsigned int conn_ids[4]; /* Connection IDs. */
struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */
};
@@ -867,7 +871,6 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long
struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *,
struct rxrpc_call_params *, gfp_t,
unsigned int);
void rxrpc_start_call_timer(struct rxrpc_call *call);
@@ -1167,6 +1170,7 @@ void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
*/
extern const struct seq_operations rxrpc_call_seq_ops;
extern const struct seq_operations rxrpc_connection_seq_ops;
+extern const struct seq_operations rxrpc_bundle_seq_ops;
extern const struct seq_operations rxrpc_peer_seq_ops;
extern const struct seq_operations rxrpc_local_seq_ops;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 773eecd1e979..beea25ac88f5 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -193,7 +193,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
* Allocate a new client call.
*/
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
- struct sockaddr_rxrpc *srx,
struct rxrpc_conn_parameters *cp,
struct rxrpc_call_params *p,
gfp_t gfp,
@@ -211,10 +210,12 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
- call->dest_srx = *srx;
+ call->dest_srx = cp->peer->srx;
+ call->dest_srx.srx_service = cp->service_id;
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
call->key = key_get(cp->key);
+ call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call);
call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call);
call->security_level = cp->security_level;
if (p->kernel)
@@ -306,10 +307,6 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
- call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
- if (!call->peer)
- goto error;
-
ret = rxrpc_look_up_bundle(call, gfp);
if (ret < 0)
goto error;
@@ -334,7 +331,6 @@ error:
*/
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
struct rxrpc_call_params *p,
gfp_t gfp,
unsigned int debug_id)
@@ -349,13 +345,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);
+ if (WARN_ON_ONCE(!cp->peer)) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-EIO);
+ }
+
limiter = rxrpc_get_call_slot(p, gfp);
if (!limiter) {
release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
}
- call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id);
+ call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id);
if (IS_ERR(call)) {
release_sock(&rx->sk);
up(limiter);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 1d95f8bc769f..3b9b267a4431 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -91,6 +91,10 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
atomic_set(&bundle->active, 1);
INIT_LIST_HEAD(&bundle->waiting_calls);
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
+
+ write_lock(&bundle->local->rxnet->conn_lock);
+ list_add_tail(&bundle->proc_link, &bundle->local->rxnet->bundle_proc_list);
+ write_unlock(&bundle->local->rxnet->conn_lock);
}
return bundle;
}
@@ -109,6 +113,9 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref),
rxrpc_bundle_free);
+ write_lock(&bundle->local->rxnet->conn_lock);
+ list_del(&bundle->proc_link);
+ write_unlock(&bundle->local->rxnet->conn_lock);
rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
key_put(bundle->key);
kfree(bundle);
@@ -338,6 +345,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
old = bundle->conns[slot];
if (old) {
bundle->conns[slot] = NULL;
+ bundle->conn_ids[slot] = 0;
trace_rxrpc_client(old, -1, rxrpc_client_replace);
rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
}
@@ -351,6 +359,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
rxrpc_activate_bundle(bundle);
conn->bundle_shift = shift;
bundle->conns[slot] = conn;
+ bundle->conn_ids[slot] = conn->debug_id;
for (i = 0; i < RXRPC_MAXCALLS; i++)
set_bit(shift + i, &bundle->avail_chans);
return true;
@@ -671,6 +680,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (bundle->conns[bindex] == conn) {
_debug("clear slot %u", bindex);
bundle->conns[bindex] = NULL;
+ bundle->conn_ids[bindex] = 0;
for (i = 0; i < RXRPC_MAXCALLS; i++)
clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
rxrpc_put_client_connection_id(bundle->local, conn);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 89ac05a711a4..39c908a3ca6e 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -25,7 +25,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
struct rxrpc_conn_proto k;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rb_node *p;
- unsigned int seq = 0;
+ unsigned int seq = 1;
k.epoch = sp->hdr.epoch;
k.cid = sp->hdr.cid & RXRPC_CIDMASK;
@@ -35,6 +35,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
* under just the RCU read lock, so we have to check for
* changes.
*/
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
p = rcu_dereference_raw(peer->service_conns.rb_node);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index a0319c040c25..a4c135d0fbcc 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -45,6 +45,7 @@ static __net_init int rxrpc_init_net(struct net *net)
atomic_set(&rxnet->nr_calls, 1);
atomic_set(&rxnet->nr_conns, 1);
+ INIT_LIST_HEAD(&rxnet->bundle_proc_list);
INIT_LIST_HEAD(&rxnet->conn_proc_list);
INIT_LIST_HEAD(&rxnet->service_conns);
rwlock_init(&rxnet->conn_lock);
@@ -78,6 +79,9 @@ static __net_init int rxrpc_init_net(struct net *net)
proc_create_net("conns", 0444, rxnet->proc_net,
&rxrpc_connection_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net("bundles", 0444, rxnet->proc_net,
+ &rxrpc_bundle_seq_ops,
+ sizeof(struct seq_net_private));
proc_create_net("peers", 0444, rxnet->proc_net,
&rxrpc_peer_seq_ops,
sizeof(struct seq_net_private));
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 8d7a715a0bb1..49dcda67a0d5 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -22,6 +22,8 @@
#include <net/ip6_route.h>
#include "ar-internal.h"
+static const struct sockaddr_rxrpc rxrpc_null_addr;
+
/*
* Hash a peer key.
*/
@@ -457,39 +459,53 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
}
/**
- * rxrpc_kernel_get_peer - Get the peer address of a call
+ * rxrpc_kernel_get_call_peer - Get the peer address of a call
* @sock: The socket on which the call is in progress.
* @call: The call to query
- * @_srx: Where to place the result
*
- * Get the address of the remote peer in a call.
+ * Get a record for the remote peer in a call.
*/
-void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
- struct sockaddr_rxrpc *_srx)
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call)
{
- *_srx = call->peer->srx;
+ return call->peer;
}
-EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+EXPORT_SYMBOL(rxrpc_kernel_get_call_peer);
/**
* rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
- * @sock: The socket on which the call is in progress.
- * @call: The call to query
- * @_srtt: Where to store the SRTT value.
+ * @peer: The peer to query
*
- * Get the call's peer smoothed RTT in uS.
+ * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples.
*/
-bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call,
- u32 *_srtt)
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer)
{
- struct rxrpc_peer *peer = call->peer;
+ return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
- if (peer->rtt_count == 0) {
- *_srtt = 1000000; /* 1S */
- return false;
- }
+/**
+ * rxrpc_kernel_remote_srx - Get the address of a peer
+ * @peer: The peer to query
+ *
+ * Get a pointer to the address from a peer record. The caller is responsible
+ * for making sure that the address is not deallocated.
+ */
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer)
+{
+ return peer ? &peer->srx : &rxrpc_null_addr;
+}
+EXPORT_SYMBOL(rxrpc_kernel_remote_srx);
- *_srtt = call->peer->srtt_us >> 3;
- return true;
+/**
+ * rxrpc_kernel_remote_addr - Get the peer transport address of a call
+ * @peer: The peer to query
+ *
+ * Get a pointer to the transport address from a peer record. The caller is
+ * responsible for making sure that the address is not deallocated.
+ */
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer)
+{
+ return (const struct sockaddr *)
+ (peer ? &peer->srx.transport : &rxrpc_null_addr.transport);
}
-EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
+EXPORT_SYMBOL(rxrpc_kernel_remote_addr);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 682636d3b060..6c86cbb98d1d 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -199,6 +199,82 @@ const struct seq_operations rxrpc_connection_seq_ops = {
};
/*
+ * generate a list of extant virtual bundles in /proc/net/rxrpc/bundles
+ */
+static void *rxrpc_bundle_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_lock(&rxnet->conn_lock);
+ return seq_list_start_head(&rxnet->bundle_proc_list, *_pos);
+}
+
+static void *rxrpc_bundle_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next(v, &rxnet->bundle_proc_list, pos);
+}
+
+static void rxrpc_bundle_seq_stop(struct seq_file *seq, void *v)
+ __releases(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_unlock(&rxnet->conn_lock);
+}
+
+static int rxrpc_bundle_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_bundle *bundle;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ char lbuff[50], rbuff[50];
+
+ if (v == &rxnet->bundle_proc_list) {
+ seq_puts(seq,
+ "Proto Local "
+ " Remote "
+ " SvID Ref Act Flg Key |"
+ " Bundle Conn_0 Conn_1 Conn_2 Conn_3\n"
+ );
+ return 0;
+ }
+
+ bundle = list_entry(v, struct rxrpc_bundle, proc_link);
+
+ sprintf(lbuff, "%pISpc", &bundle->local->srx.transport);
+ sprintf(rbuff, "%pISpc", &bundle->peer->srx.transport);
+ seq_printf(seq,
+ "UDP %-47.47s %-47.47s %4x %3u %3d"
+ " %c%c%c %08x | %08x %08x %08x %08x %08x\n",
+ lbuff,
+ rbuff,
+ bundle->service_id,
+ refcount_read(&bundle->ref),
+ atomic_read(&bundle->active),
+ bundle->try_upgrade ? 'U' : '-',
+ bundle->exclusive ? 'e' : '-',
+ bundle->upgrade ? 'u' : '-',
+ key_serial(bundle->key),
+ bundle->debug_id,
+ bundle->conn_ids[0],
+ bundle->conn_ids[1],
+ bundle->conn_ids[2],
+ bundle->conn_ids[3]);
+
+ return 0;
+}
+
+const struct seq_operations rxrpc_bundle_seq_ops = {
+ .start = rxrpc_bundle_seq_start,
+ .next = rxrpc_bundle_seq_next,
+ .stop = rxrpc_bundle_seq_stop,
+ .show = rxrpc_bundle_seq_show,
+};
+
+/*
* generate a list of extant virtual peers in /proc/net/rxrpc/peers
*/
static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 8e0b94714e84..5677d5690a02 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -572,6 +572,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
__acquires(&call->user_mutex)
{
struct rxrpc_conn_parameters cp;
+ struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct key *key;
@@ -584,21 +585,29 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
return ERR_PTR(-EDESTADDRREQ);
}
+ peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL);
+ if (!peer) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-ENOMEM);
+ }
+
key = rx->key;
if (key && !rx->key->payload.data[0])
key = NULL;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
+ cp.peer = peer;
cp.key = rx->key;
cp.security_level = rx->min_sec_level;
cp.exclusive = rx->exclusive | p->exclusive;
cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL,
+ call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
+ rxrpc_put_peer(peer, rxrpc_peer_put_application);
_leave(" = %p\n", call);
return call;
}