summaryrefslogtreecommitdiff
path: root/fs/afs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/afs')
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/Makefile2
-rw-r--r--fs/afs/addr_list.c50
-rw-r--r--fs/afs/addr_prefs.c6
-rw-r--r--fs/afs/afs.h2
-rw-r--r--fs/afs/afs_vl.h10
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/cell.c467
-rw-r--r--fs/afs/cm_security.c340
-rw-r--r--fs/afs/cmservice.c82
-rw-r--r--fs/afs/dir.c889
-rw-r--r--fs/afs/dir_edit.c436
-rw-r--r--fs/afs/dir_search.c227
-rw-r--r--fs/afs/dir_silly.c6
-rw-r--r--fs/afs/dynroot.c488
-rw-r--r--fs/afs/file.c264
-rw-r--r--fs/afs/fs_operation.c115
-rw-r--r--fs/afs/fs_probe.c38
-rw-r--r--fs/afs/fsclient.c65
-rw-r--r--fs/afs/inode.c156
-rw-r--r--fs/afs/internal.h273
-rw-r--r--fs/afs/main.c19
-rw-r--r--fs/afs/misc.c27
-rw-r--r--fs/afs/mntpt.c33
-rw-r--r--fs/afs/proc.c25
-rw-r--r--fs/afs/rotate.c23
-rw-r--r--fs/afs/rxrpc.c160
-rw-r--r--fs/afs/server.c604
-rw-r--r--fs/afs/server_list.c6
-rw-r--r--fs/afs/super.c29
-rw-r--r--fs/afs/validation.c35
-rw-r--r--fs/afs/vl_alias.c13
-rw-r--r--fs/afs/vl_rotate.c2
-rw-r--r--fs/afs/vlclient.c3
-rw-r--r--fs/afs/volume.c15
-rw-r--r--fs/afs/write.c206
-rw-r--r--fs/afs/xdr_fs.h2
-rw-r--r--fs/afs/yfsclient.c53
38 files changed, 3173 insertions, 2003 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index fc8ba9142f2f..682bd8ec2c10 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -5,6 +5,7 @@ config AFS_FS
select AF_RXRPC
select DNS_RESOLVER
select NETFS_SUPPORT
+ select CRYPTO_KRB5
help
If you say Y here, you will get an experimental Andrew File System
driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index dcdc0f1bb76f..b49b8fe682f3 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -8,9 +8,11 @@ kafs-y := \
addr_prefs.o \
callback.o \
cell.o \
+ cm_security.o \
cmservice.o \
dir.o \
dir_edit.o \
+ dir_search.o \
dir_silly.o \
dynroot.o \
file.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 6d42f85c6be5..e941da5b6dd9 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -362,3 +362,53 @@ int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist,
alist->nr_addrs++;
return 0;
}
+
+/*
+ * Set the app data on the rxrpc peers an address list points to
+ */
+void afs_set_peer_appdata(struct afs_server *server,
+ struct afs_addr_list *old_alist,
+ struct afs_addr_list *new_alist)
+{
+ unsigned long data = (unsigned long)server;
+ int n = 0, o = 0;
+
+ if (!old_alist) {
+ /* New server. Just set all. */
+ for (; n < new_alist->nr_addrs; n++)
+ rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data);
+ return;
+ }
+ if (!new_alist) {
+ /* Dead server. Just remove all. */
+ for (; o < old_alist->nr_addrs; o++)
+ rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0);
+ return;
+ }
+
+ /* Walk through the two lists simultaneously, setting new peers and
+ * clearing old ones. The two lists are ordered by pointer to peer
+ * record.
+ */
+ while (n < new_alist->nr_addrs && o < old_alist->nr_addrs) {
+ struct rxrpc_peer *pn = new_alist->addrs[n].peer;
+ struct rxrpc_peer *po = old_alist->addrs[o].peer;
+
+ if (pn == po)
+ continue;
+ if (pn < po) {
+ rxrpc_kernel_set_peer_data(pn, data);
+ n++;
+ } else {
+ rxrpc_kernel_set_peer_data(po, 0);
+ o++;
+ }
+ }
+
+ if (n < new_alist->nr_addrs)
+ for (; n < new_alist->nr_addrs; n++)
+ rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data);
+ if (o < old_alist->nr_addrs)
+ for (; o < old_alist->nr_addrs; o++)
+ rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0);
+}
diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c
index a189ff8a5034..c0384201b8fe 100644
--- a/fs/afs/addr_prefs.c
+++ b/fs/afs/addr_prefs.c
@@ -413,8 +413,10 @@ int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size)
do {
argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv));
- if (argc < 0)
- return argc;
+ if (argc < 0) {
+ ret = argc;
+ goto done;
+ }
if (argc < 2)
goto inval;
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index b488072aee87..ec3db00bd081 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -10,7 +10,7 @@
#include <linux/in.h>
-#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */
+#define AFS_MAXCELLNAME 253 /* Maximum length of a cell name (DNS limited) */
#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */
#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */
#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 9c65ffb8a523..b835e25a2c02 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -13,6 +13,7 @@
#define AFS_VL_PORT 7003 /* volume location service port */
#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
+#define YFS_VL_MAXCELLNAME 256 /* Maximum length of a cell name in YFS protocol */
enum AFSVL_Operations {
VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
@@ -134,13 +135,4 @@ struct afs_uvldbentry__xdr {
__be32 spares9;
};
-struct afs_address_list {
- refcount_t usage;
- unsigned int version;
- unsigned int nr_addrs;
- struct sockaddr_rxrpc addrs[];
-};
-
-extern void afs_put_address_list(struct afs_address_list *alist);
-
#endif /* AFS_VL_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 99b2c8172021..69e1dd55b160 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -41,7 +41,7 @@ static void afs_volume_init_callback(struct afs_volume *volume)
list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb);
queue_work(system_unbound_wq, &vnode->cb_work);
}
}
@@ -79,7 +79,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
_enter("");
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) {
+ if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) {
vnode->cb_break++;
vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
afs_clear_permits(vnode);
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index caa09875f520..0168bbf53fe0 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,8 +20,9 @@ static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
static atomic_t cell_debug_id;
-static void afs_queue_cell_manager(struct afs_net *);
-static void afs_manage_cell_work(struct work_struct *);
+static void afs_cell_timer(struct timer_list *timer);
+static void afs_destroy_cell_work(struct work_struct *work);
+static void afs_manage_cell_work(struct work_struct *work);
static void afs_dec_cells_outstanding(struct afs_net *net)
{
@@ -29,19 +30,11 @@ static void afs_dec_cells_outstanding(struct afs_net *net)
wake_up_var(&net->cells_outstanding);
}
-/*
- * Set the cell timer to fire after a given delay, assuming it's not already
- * set for an earlier time.
- */
-static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
+static void afs_set_cell_state(struct afs_cell *cell, enum afs_cell_state state)
{
- if (net->live) {
- atomic_inc(&net->cells_outstanding);
- if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
- afs_dec_cells_outstanding(net);
- } else {
- afs_queue_cell_manager(net);
- }
+ smp_store_release(&cell->state, state); /* Commit cell changes before state */
+ smp_wmb(); /* Set cell state before task state */
+ wake_up_var(&cell->state);
}
/*
@@ -64,7 +57,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
return ERR_PTR(-ENAMETOOLONG);
if (!name) {
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell,
+ lockdep_is_held(&net->cells_lock));
if (!cell)
return ERR_PTR(-EDESTADDRREQ);
goto found;
@@ -115,7 +109,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
const char *name, unsigned int namelen,
const char *addresses)
{
- struct afs_vlserver_list *vllist;
+ struct afs_vlserver_list *vllist = NULL;
struct afs_cell *cell;
int i, ret;
@@ -146,27 +140,31 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
return ERR_PTR(-ENOMEM);
}
- cell->name = kmalloc(namelen + 1, GFP_KERNEL);
+ cell->name = kmalloc(1 + namelen + 1, GFP_KERNEL);
if (!cell->name) {
kfree(cell);
return ERR_PTR(-ENOMEM);
}
- cell->net = net;
+ cell->name[0] = '.';
+ cell->name++;
cell->name_len = namelen;
for (i = 0; i < namelen; i++)
cell->name[i] = tolower(name[i]);
cell->name[i] = 0;
+ cell->net = net;
refcount_set(&cell->ref, 1);
atomic_set(&cell->active, 0);
+ INIT_WORK(&cell->destroyer, afs_destroy_cell_work);
INIT_WORK(&cell->manager, afs_manage_cell_work);
+ timer_setup(&cell->management_timer, afs_cell_timer, 0);
init_rwsem(&cell->vs_lock);
cell->volumes = RB_ROOT;
INIT_HLIST_HEAD(&cell->proc_volumes);
seqlock_init(&cell->volume_lock);
cell->fs_servers = RB_ROOT;
- seqlock_init(&cell->fs_lock);
+ init_rwsem(&cell->fs_lock);
rwlock_init(&cell->vl_servers_lock);
cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
@@ -201,7 +199,13 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
cell->dns_status = vllist->status;
smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
atomic_inc(&net->cells_outstanding);
+ ret = idr_alloc_cyclic(&net->cells_dyn_ino, cell,
+ 2, INT_MAX / 2, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cell->dynroot_ino = ret;
cell->debug_id = atomic_inc_return(&cell_debug_id);
+
trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc);
_leave(" = %p", cell);
@@ -211,7 +215,8 @@ parse_failed:
if (ret == -EINVAL)
printk(KERN_ERR "kAFS: bad VL server IP address\n");
error:
- kfree(cell->name);
+ afs_put_vlserverlist(cell->net, vllist);
+ kfree(cell->name - 1);
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -224,6 +229,7 @@ error:
* @namesz: The strlen of the cell name.
* @vllist: A colon/comma separated list of numeric IP addresses or NULL.
* @excl: T if an error should be given if the cell name already exists.
+ * @trace: The reason to be logged if the lookup is successful.
*
* Look up a cell record by name and query the DNS for VL server addresses if
* needed. Note that that actual DNS query is punted off to the manager thread
@@ -232,7 +238,8 @@ error:
*/
struct afs_cell *afs_lookup_cell(struct afs_net *net,
const char *name, unsigned int namesz,
- const char *vllist, bool excl)
+ const char *vllist, bool excl,
+ enum afs_cell_trace trace)
{
struct afs_cell *cell, *candidate, *cursor;
struct rb_node *parent, **pp;
@@ -242,7 +249,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
_enter("%s,%s", name, vllist);
if (!excl) {
- cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup);
+ cell = afs_find_cell(net, name, namesz, trace);
if (!IS_ERR(cell))
goto wait_for_cell;
}
@@ -285,26 +292,28 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
cell = candidate;
candidate = NULL;
- atomic_set(&cell->active, 2);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert);
+ afs_use_cell(cell, trace);
rb_link_node_rcu(&cell->net_node, parent, pp);
rb_insert_color(&cell->net_node, &net->cells);
up_write(&net->cells_lock);
- afs_queue_cell(cell, afs_cell_trace_get_queue_new);
+ afs_queue_cell(cell, afs_cell_trace_queue_new);
wait_for_cell:
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active),
- afs_cell_trace_wait);
_debug("wait_for_cell");
- wait_var_event(&cell->state,
- ({
- state = smp_load_acquire(&cell->state); /* vs error */
- state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED;
- }));
+ state = smp_load_acquire(&cell->state); /* vs error */
+ if (state != AFS_CELL_ACTIVE &&
+ state != AFS_CELL_DEAD) {
+ afs_see_cell(cell, afs_cell_trace_wait);
+ wait_var_event(&cell->state,
+ ({
+ state = smp_load_acquire(&cell->state); /* vs error */
+ state == AFS_CELL_ACTIVE || state == AFS_CELL_DEAD;
+ }));
+ }
/* Check the state obtained from the wait check. */
- if (state == AFS_CELL_REMOVED) {
+ if (state == AFS_CELL_DEAD) {
ret = cell->error;
goto error;
}
@@ -318,7 +327,7 @@ cell_already_exists:
if (excl) {
ret = -EEXIST;
} else {
- afs_use_cell(cursor, afs_cell_trace_use_lookup);
+ afs_use_cell(cursor, trace);
ret = 0;
}
up_write(&net->cells_lock);
@@ -328,7 +337,7 @@ cell_already_exists:
goto wait_for_cell;
goto error_noput;
error:
- afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup);
+ afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_error);
error_noput:
_leave(" = %d [error]", ret);
return ERR_PTR(ret);
@@ -365,8 +374,17 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
len = cp - rootcell;
}
- /* allocate a cell record for the root cell */
- new_root = afs_lookup_cell(net, rootcell, len, vllist, false);
+ if (len == 0 || !rootcell[0] || rootcell[0] == '.' || rootcell[len - 1] == '.')
+ return -EINVAL;
+ if (memchr(rootcell, '/', len))
+ return -EINVAL;
+ cp = strstr(rootcell, "..");
+ if (cp && cp < rootcell + len)
+ return -EINVAL;
+
+ /* allocate a cell record for the root/workstation cell */
+ new_root = afs_lookup_cell(net, rootcell, len, vllist, false,
+ afs_cell_trace_use_lookup_ws);
if (IS_ERR(new_root)) {
_leave(" = %ld", PTR_ERR(new_root));
return PTR_ERR(new_root);
@@ -377,12 +395,11 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
/* install the new cell */
down_write(&net->cells_lock);
- afs_see_cell(new_root, afs_cell_trace_see_ws);
- old_root = net->ws_cell;
- net->ws_cell = new_root;
+ old_root = rcu_replace_pointer(net->ws_cell, new_root,
+ lockdep_is_held(&net->cells_lock));
up_write(&net->cells_lock);
- afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws);
+ afs_unuse_cell(old_root, afs_cell_trace_unuse_ws);
_leave(" = 0");
return 0;
}
@@ -500,39 +517,24 @@ static void afs_cell_destroy(struct rcu_head *rcu)
trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free);
afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers));
- afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias);
+ afs_unuse_cell(cell->alias_of, afs_cell_trace_unuse_alias);
key_put(cell->anonymous_key);
- kfree(cell->name);
+ idr_remove(&net->cells_dyn_ino, cell->dynroot_ino);
+ kfree(cell->name - 1);
kfree(cell);
afs_dec_cells_outstanding(net);
_leave(" [destroyed]");
}
-/*
- * Queue the cell manager.
- */
-static void afs_queue_cell_manager(struct afs_net *net)
-{
- int outstanding = atomic_inc_return(&net->cells_outstanding);
-
- _enter("%d", outstanding);
-
- if (!queue_work(afs_wq, &net->cells_manager))
- afs_dec_cells_outstanding(net);
-}
-
-/*
- * Cell management timer. We have an increment on cells_outstanding that we
- * need to pass along to the work item.
- */
-void afs_cells_timer(struct timer_list *timer)
+static void afs_destroy_cell_work(struct work_struct *work)
{
- struct afs_net *net = container_of(timer, struct afs_net, cells_timer);
+ struct afs_cell *cell = container_of(work, struct afs_cell, destroyer);
- _enter("");
- if (!queue_work(afs_wq, &net->cells_manager))
- afs_dec_cells_outstanding(net);
+ afs_see_cell(cell, afs_cell_trace_destroy);
+ timer_delete_sync(&cell->management_timer);
+ cancel_work_sync(&cell->manager);
+ call_rcu(&cell->rcu, afs_cell_destroy);
}
/*
@@ -564,7 +566,7 @@ void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
if (zero) {
a = atomic_read(&cell->active);
WARN(a != 0, "Cell active count %u > 0\n", a);
- call_rcu(&cell->rcu, afs_cell_destroy);
+ WARN_ON(!queue_work(afs_wq, &cell->destroyer));
}
}
}
@@ -576,10 +578,9 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
int r, a;
- r = refcount_read(&cell->ref);
- WARN_ON(r == 0);
+ __refcount_inc(&cell->ref, &r);
a = atomic_inc_return(&cell->active);
- trace_afs_cell(cell->debug_id, r, a, reason);
+ trace_afs_cell(cell->debug_id, r + 1, a, reason);
return cell;
}
@@ -587,10 +588,11 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
* Record a cell becoming less active. When the active counter reaches 1, it
* is scheduled for destruction, but may get reactivated.
*/
-void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason)
+void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
unsigned int debug_id;
time64_t now, expire_delay;
+ bool zero;
int r, a;
if (!cell)
@@ -605,13 +607,15 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
expire_delay = afs_cell_gc_delay;
debug_id = cell->debug_id;
- r = refcount_read(&cell->ref);
a = atomic_dec_return(&cell->active);
- trace_afs_cell(debug_id, r, a, reason);
- WARN_ON(a == 0);
- if (a == 1)
+ if (!a)
/* 'cell' may now be garbage collected. */
- afs_set_cell_timer(net, expire_delay);
+ afs_set_cell_timer(cell, expire_delay);
+
+ zero = __refcount_dec_and_test(&cell->ref, &r);
+ trace_afs_cell(debug_id, r - 1, a, reason);
+ if (zero)
+ WARN_ON(!queue_work(afs_wq, &cell->destroyer));
}
/*
@@ -631,9 +635,27 @@ void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason)
*/
void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
- afs_get_cell(cell, reason);
- if (!queue_work(afs_wq, &cell->manager))
- afs_put_cell(cell, afs_cell_trace_put_queue_fail);
+ queue_work(afs_wq, &cell->manager);
+}
+
+/*
+ * Cell-specific management timer.
+ */
+static void afs_cell_timer(struct timer_list *timer)
+{
+ struct afs_cell *cell = container_of(timer, struct afs_cell, management_timer);
+
+ afs_see_cell(cell, afs_cell_trace_see_mgmt_timer);
+ if (refcount_read(&cell->ref) > 0 && cell->net->live)
+ queue_work(afs_wq, &cell->manager);
+}
+
+/*
+ * Set/reduce the cell timer.
+ */
+void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs)
+{
+ timer_reduce(&cell->management_timer, jiffies + delay_secs * HZ);
}
/*
@@ -695,7 +717,6 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
if (cell->proc_link.next)
cell->proc_link.next->pprev = &cell->proc_link.next;
- afs_dynroot_mkdir(net, cell);
mutex_unlock(&net->proc_cells_lock);
return 0;
}
@@ -710,242 +731,164 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
afs_proc_cell_remove(cell);
mutex_lock(&net->proc_cells_lock);
- hlist_del_rcu(&cell->proc_link);
- afs_dynroot_rmdir(net, cell);
+ if (!hlist_unhashed(&cell->proc_link))
+ hlist_del_rcu(&cell->proc_link);
mutex_unlock(&net->proc_cells_lock);
_leave("");
}
+static bool afs_has_cell_expired(struct afs_cell *cell, time64_t *_next_manage)
+{
+ const struct afs_vlserver_list *vllist;
+ time64_t expire_at = cell->last_inactive;
+ time64_t now = ktime_get_real_seconds();
+
+ if (atomic_read(&cell->active))
+ return false;
+ if (!cell->net->live)
+ return true;
+
+ vllist = rcu_dereference_protected(cell->vl_servers, true);
+ if (vllist && vllist->nr_servers > 0)
+ expire_at += afs_cell_gc_delay;
+
+ if (expire_at <= now)
+ return true;
+ if (expire_at < *_next_manage)
+ *_next_manage = expire_at;
+ return false;
+}
+
/*
* Manage a cell record, initialising and destroying it, maintaining its DNS
* records.
*/
-static void afs_manage_cell(struct afs_cell *cell)
+static bool afs_manage_cell(struct afs_cell *cell)
{
struct afs_net *net = cell->net;
- int ret, active;
+ time64_t next_manage = TIME64_MAX;
+ int ret;
_enter("%s", cell->name);
-again:
_debug("state %u", cell->state);
switch (cell->state) {
- case AFS_CELL_INACTIVE:
- case AFS_CELL_FAILED:
- down_write(&net->cells_lock);
- active = 1;
- if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) {
- rb_erase(&cell->net_node, &net->cells);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0,
- afs_cell_trace_unuse_delete);
- smp_store_release(&cell->state, AFS_CELL_REMOVED);
- }
- up_write(&net->cells_lock);
- if (cell->state == AFS_CELL_REMOVED) {
- wake_up_var(&cell->state);
- goto final_destruction;
- }
- if (cell->state == AFS_CELL_FAILED)
- goto done;
- smp_store_release(&cell->state, AFS_CELL_UNSET);
- wake_up_var(&cell->state);
- goto again;
-
- case AFS_CELL_UNSET:
- smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
- wake_up_var(&cell->state);
- goto again;
-
- case AFS_CELL_ACTIVATING:
- ret = afs_activate_cell(net, cell);
- if (ret < 0)
- goto activation_failed;
+ case AFS_CELL_SETTING_UP:
+ goto set_up_cell;
+ case AFS_CELL_ACTIVE:
+ goto cell_is_active;
+ case AFS_CELL_REMOVING:
+ WARN_ON_ONCE(1);
+ return false;
+ case AFS_CELL_DEAD:
+ return false;
+ default:
+ _debug("bad state %u", cell->state);
+ WARN_ON_ONCE(1); /* Unhandled state */
+ return false;
+ }
+
+set_up_cell:
+ ret = afs_activate_cell(net, cell);
+ if (ret < 0) {
+ cell->error = ret;
+ goto remove_cell;
+ }
- smp_store_release(&cell->state, AFS_CELL_ACTIVE);
- wake_up_var(&cell->state);
- goto again;
+ afs_set_cell_state(cell, AFS_CELL_ACTIVE);
- case AFS_CELL_ACTIVE:
- if (atomic_read(&cell->active) > 1) {
- if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
- ret = afs_update_cell(cell);
- if (ret < 0)
- cell->error = ret;
- }
- goto done;
- }
- smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
- wake_up_var(&cell->state);
- goto again;
+cell_is_active:
+ if (afs_has_cell_expired(cell, &next_manage))
+ goto remove_cell;
- case AFS_CELL_DEACTIVATING:
- if (atomic_read(&cell->active) > 1)
- goto reverse_deactivation;
- afs_deactivate_cell(net, cell);
- smp_store_release(&cell->state, AFS_CELL_INACTIVE);
- wake_up_var(&cell->state);
- goto again;
+ if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
+ ret = afs_update_cell(cell);
+ if (ret < 0)
+ cell->error = ret;
+ }
- case AFS_CELL_REMOVED:
- goto done;
+ if (next_manage < TIME64_MAX && cell->net->live) {
+ time64_t now = ktime_get_real_seconds();
- default:
- break;
+ if (next_manage - now <= 0)
+ afs_queue_cell(cell, afs_cell_trace_queue_again);
+ else
+ afs_set_cell_timer(cell, next_manage - now);
}
- _debug("bad state %u", cell->state);
- BUG(); /* Unhandled state */
+ _leave(" [done %u]", cell->state);
+ return false;
-activation_failed:
- cell->error = ret;
- afs_deactivate_cell(net, cell);
+remove_cell:
+ down_write(&net->cells_lock);
- smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
- wake_up_var(&cell->state);
- goto again;
+ if (atomic_read(&cell->active)) {
+ up_write(&net->cells_lock);
+ goto cell_is_active;
+ }
-reverse_deactivation:
- smp_store_release(&cell->state, AFS_CELL_ACTIVE);
- wake_up_var(&cell->state);
- _leave(" [deact->act]");
- return;
+ /* Make sure that the expiring server records are going to see the fact
+ * that the cell is caput.
+ */
+ afs_set_cell_state(cell, AFS_CELL_REMOVING);
-done:
- _leave(" [done %u]", cell->state);
- return;
+ afs_deactivate_cell(net, cell);
+ afs_purge_servers(cell);
+
+ rb_erase(&cell->net_node, &net->cells);
+ afs_see_cell(cell, afs_cell_trace_unuse_delete);
+ up_write(&net->cells_lock);
-final_destruction:
/* The root volume is pinning the cell */
afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root);
cell->root_volume = NULL;
- afs_put_cell(cell, afs_cell_trace_put_destroy);
+
+ afs_set_cell_state(cell, AFS_CELL_DEAD);
+ return true;
}
static void afs_manage_cell_work(struct work_struct *work)
{
struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+ bool final_put;
- afs_manage_cell(cell);
- afs_put_cell(cell, afs_cell_trace_put_queue_work);
+ afs_see_cell(cell, afs_cell_trace_manage);
+ final_put = afs_manage_cell(cell);
+ afs_see_cell(cell, afs_cell_trace_managed);
+ if (final_put)
+ afs_put_cell(cell, afs_cell_trace_put_final);
}
/*
- * Manage the records of cells known to a network namespace. This includes
- * updating the DNS records and garbage collecting unused cells that were
- * automatically added.
- *
- * Note that constructed cell records may only be removed from net->cells by
- * this work item, so it is safe for this work item to stash a cursor pointing
- * into the tree and then return to caller (provided it skips cells that are
- * still under construction).
- *
- * Note also that we were given an increment on net->cells_outstanding by
- * whoever queued us that we need to deal with before returning.
+ * Purge in-memory cell database.
*/
-void afs_manage_cells(struct work_struct *work)
+void afs_cell_purge(struct afs_net *net)
{
- struct afs_net *net = container_of(work, struct afs_net, cells_manager);
+ struct afs_cell *ws;
struct rb_node *cursor;
- time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
- bool purging = !net->live;
_enter("");
- /* Trawl the cell database looking for cells that have expired from
- * lack of use and cells whose DNS results have expired and dispatch
- * their managers.
- */
- down_read(&net->cells_lock);
+ down_write(&net->cells_lock);
+ ws = rcu_replace_pointer(net->ws_cell, NULL,
+ lockdep_is_held(&net->cells_lock));
+ up_write(&net->cells_lock);
+ afs_unuse_cell(ws, afs_cell_trace_unuse_ws);
+ _debug("kick cells");
+ down_read(&net->cells_lock);
for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
- struct afs_cell *cell =
- rb_entry(cursor, struct afs_cell, net_node);
- unsigned active;
- bool sched_cell = false;
-
- active = atomic_read(&cell->active);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
- active, afs_cell_trace_manage);
-
- ASSERTCMP(active, >=, 1);
-
- if (purging) {
- if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) {
- active = atomic_dec_return(&cell->active);
- trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
- active, afs_cell_trace_unuse_pin);
- }
- }
+ struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node);
- if (active == 1) {
- struct afs_vlserver_list *vllist;
- time64_t expire_at = cell->last_inactive;
-
- read_lock(&cell->vl_servers_lock);
- vllist = rcu_dereference_protected(
- cell->vl_servers,
- lockdep_is_held(&cell->vl_servers_lock));
- if (vllist->nr_servers > 0)
- expire_at += afs_cell_gc_delay;
- read_unlock(&cell->vl_servers_lock);
- if (purging || expire_at <= now)
- sched_cell = true;
- else if (expire_at < next_manage)
- next_manage = expire_at;
- }
+ afs_see_cell(cell, afs_cell_trace_purge);
- if (!purging) {
- if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags))
- sched_cell = true;
- }
+ if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+ afs_unuse_cell(cell, afs_cell_trace_unuse_pin);
- if (sched_cell)
- afs_queue_cell(cell, afs_cell_trace_get_queue_manage);
+ afs_queue_cell(cell, afs_cell_trace_queue_purge);
}
-
up_read(&net->cells_lock);
- /* Update the timer on the way out. We have to pass an increment on
- * cells_outstanding in the namespace that we are in to the timer or
- * the work scheduler.
- */
- if (!purging && next_manage < TIME64_MAX) {
- now = ktime_get_real_seconds();
-
- if (next_manage - now <= 0) {
- if (queue_work(afs_wq, &net->cells_manager))
- atomic_inc(&net->cells_outstanding);
- } else {
- afs_set_cell_timer(net, next_manage - now);
- }
- }
-
- afs_dec_cells_outstanding(net);
- _leave(" [%d]", atomic_read(&net->cells_outstanding));
-}
-
-/*
- * Purge in-memory cell database.
- */
-void afs_cell_purge(struct afs_net *net)
-{
- struct afs_cell *ws;
-
- _enter("");
-
- down_write(&net->cells_lock);
- ws = net->ws_cell;
- net->ws_cell = NULL;
- up_write(&net->cells_lock);
- afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws);
-
- _debug("del timer");
- if (del_timer_sync(&net->cells_timer))
- atomic_dec(&net->cells_outstanding);
-
- _debug("kick mgr");
- afs_queue_cell_manager(net);
-
_debug("wait");
wait_var_event(&net->cells_outstanding,
!atomic_read(&net->cells_outstanding));
diff --git a/fs/afs/cm_security.c b/fs/afs/cm_security.c
new file mode 100644
index 000000000000..edcbd249d202
--- /dev/null
+++ b/fs/afs/cm_security.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Cache manager security.
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <crypto/krb5.h>
+#include "internal.h"
+#include "afs_cm.h"
+#include "afs_fs.h"
+#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c
+#define xdr_round_up(x) (round_up((x), sizeof(__be32)))
+#define xdr_len_object(x) (4 + round_up((x), sizeof(__be32)))
+
+#ifdef CONFIG_RXGK
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+ struct afs_server *server);
+#endif
+
+/*
+ * Respond to an RxGK challenge, adding appdata.
+ */
+static int afs_respond_to_challenge(struct sk_buff *challenge)
+{
+#ifdef CONFIG_RXGK
+ struct krb5_buffer appdata = {};
+ struct afs_server *server;
+#endif
+ struct rxrpc_peer *peer;
+ unsigned long peer_data;
+ u16 service_id;
+ u8 security_index;
+
+ rxrpc_kernel_query_challenge(challenge, &peer, &peer_data,
+ &service_id, &security_index);
+
+ _enter("%u,%u", service_id, security_index);
+
+ switch (service_id) {
+ /* We don't send CM_SERVICE RPCs, so don't expect a challenge
+ * therefrom.
+ */
+ case FS_SERVICE:
+ case VL_SERVICE:
+ case YFS_FS_SERVICE:
+ case YFS_VL_SERVICE:
+ break;
+ default:
+ pr_warn("Can't respond to unknown challenge %u:%u",
+ service_id, security_index);
+ return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+ afs_abort_unsupported_sec_class);
+ }
+
+ switch (security_index) {
+#ifdef CONFIG_RXKAD
+ case RXRPC_SECURITY_RXKAD:
+ return rxkad_kernel_respond_to_challenge(challenge);
+#endif
+
+#ifdef CONFIG_RXGK
+ case RXRPC_SECURITY_RXGK:
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+
+ case RXRPC_SECURITY_YFS_RXGK:
+ switch (service_id) {
+ case FS_SERVICE:
+ case YFS_FS_SERVICE:
+ server = (struct afs_server *)peer_data;
+ if (!server->cm_rxgk_appdata.data) {
+ mutex_lock(&server->cm_token_lock);
+ if (!server->cm_rxgk_appdata.data)
+ afs_create_yfs_cm_token(challenge, server);
+ mutex_unlock(&server->cm_token_lock);
+ }
+ if (server->cm_rxgk_appdata.data)
+ appdata = server->cm_rxgk_appdata;
+ break;
+ }
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+#endif
+
+ default:
+ return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+ afs_abort_unsupported_sec_class);
+ }
+}
+
+/*
+ * Process the OOB message queue, processing challenge packets.
+ */
+void afs_process_oob_queue(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, rx_oob_work);
+ struct sk_buff *oob;
+ enum rxrpc_oob_type type;
+
+ while ((oob = rxrpc_kernel_dequeue_oob(net->socket, &type))) {
+ switch (type) {
+ case RXRPC_OOB_CHALLENGE:
+ afs_respond_to_challenge(oob);
+ break;
+ }
+ rxrpc_kernel_free_oob(oob);
+ }
+}
+
+#ifdef CONFIG_RXGK
+/*
+ * Create a securities keyring for the cache manager and attach a key to it for
+ * the RxGK tokens we want to use to secure the callback connection back from
+ * the fileserver.
+ */
+int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+ const struct krb5_enctype *krb5;
+ struct key *ring;
+ key_ref_t key;
+ char K0[32], *desc;
+ int ret;
+
+ ring = keyring_alloc("kafs",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ KEY_POS_SEARCH | KEY_POS_WRITE |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(ring))
+ return PTR_ERR(ring);
+
+ ret = rxrpc_sock_set_security_keyring(socket->sk, ring);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOPKG;
+ krb5 = crypto_krb5_find_enctype(KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96);
+ if (!krb5)
+ goto out;
+
+ if (WARN_ON_ONCE(krb5->key_len > sizeof(K0)))
+ goto out;
+
+ ret = -ENOMEM;
+ desc = kasprintf(GFP_KERNEL, "%u:%u:%u:%u",
+ YFS_CM_SERVICE, RXRPC_SECURITY_YFS_RXGK, 1, krb5->etype);
+ if (!desc)
+ goto out;
+
+ wait_for_random_bytes();
+ get_random_bytes(K0, krb5->key_len);
+
+ key = key_create(make_key_ref(ring, true),
+ "rxrpc_s", desc,
+ K0, krb5->key_len,
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ kfree(desc);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto out;
+ }
+
+ net->fs_cm_token_key = key_ref_to_ptr(key);
+ ret = 0;
+out:
+ key_put(ring);
+ return ret;
+}
+
+/*
+ * Create an YFS RxGK GSS token to use as a ticket to the specified fileserver.
+ */
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+ struct afs_server *server)
+{
+ const struct krb5_enctype *conn_krb5, *token_krb5;
+ const struct krb5_buffer *token_key;
+ struct crypto_aead *aead;
+ struct scatterlist sg;
+ struct afs_net *net = server->cell->net;
+ const struct key *key = net->fs_cm_token_key;
+ size_t keysize, uuidsize, authsize, toksize, encsize, contsize, adatasize, offset;
+ __be32 caps[1] = {
+ [0] = htonl(AFS_CAP_ERROR_TRANSLATION),
+ };
+ __be32 *xdr;
+ void *appdata, *K0, *encbase;
+ u32 enctype;
+ int ret;
+
+ if (!key)
+ return -ENOKEY;
+
+ /* Assume that the fileserver is happy to use the same encoding type as
+ * we were told to use by the token obtained by the user.
+ */
+ enctype = rxgk_kernel_query_challenge(challenge);
+
+ conn_krb5 = crypto_krb5_find_enctype(enctype);
+ if (!conn_krb5)
+ return -ENOPKG;
+ token_krb5 = key->payload.data[0];
+ token_key = (const struct krb5_buffer *)&key->payload.data[2];
+
+ /* struct rxgk_key {
+ * afs_uint32 enctype;
+ * opaque key<>;
+ * };
+ */
+ keysize = 4 + xdr_len_object(conn_krb5->key_len);
+
+ /* struct RXGK_AuthName {
+ * afs_int32 kind;
+ * opaque data<AUTHDATAMAX>;
+ * opaque display<AUTHPRINTABLEMAX>;
+ * };
+ */
+ uuidsize = sizeof(server->uuid);
+ authsize = 4 + xdr_len_object(uuidsize) + xdr_len_object(0);
+
+ /* struct RXGK_Token {
+ * rxgk_key K0;
+ * RXGK_Level level;
+ * rxgkTime starttime;
+ * afs_int32 lifetime;
+ * afs_int32 bytelife;
+ * rxgkTime expirationtime;
+ * struct RXGK_AuthName identities<>;
+ * };
+ */
+ toksize = keysize + 8 + 4 + 4 + 8 + xdr_len_object(authsize);
+
+ offset = 0;
+ encsize = crypto_krb5_how_much_buffer(token_krb5, KRB5_ENCRYPT_MODE, toksize, &offset);
+
+ /* struct RXGK_TokenContainer {
+ * afs_int32 kvno;
+ * afs_int32 enctype;
+ * opaque encrypted_token<>;
+ * };
+ */
+ contsize = 4 + 4 + xdr_len_object(encsize);
+
+ /* struct YFSAppData {
+ * opr_uuid initiatorUuid;
+ * opr_uuid acceptorUuid;
+ * Capabilities caps;
+ * afs_int32 enctype;
+ * opaque callbackKey<>;
+ * opaque callbackToken<>;
+ * };
+ */
+ adatasize = 16 + 16 +
+ xdr_len_object(sizeof(caps)) +
+ 4 +
+ xdr_len_object(conn_krb5->key_len) +
+ xdr_len_object(contsize);
+
+ ret = -ENOMEM;
+ appdata = kzalloc(adatasize, GFP_KERNEL);
+ if (!appdata)
+ goto out;
+ xdr = appdata;
+
+ memcpy(xdr, &net->uuid, 16); /* appdata.initiatorUuid */
+ xdr += 16 / 4;
+ memcpy(xdr, &server->uuid, 16); /* appdata.acceptorUuid */
+ xdr += 16 / 4;
+ *xdr++ = htonl(ARRAY_SIZE(caps)); /* appdata.caps.len */
+ memcpy(xdr, &caps, sizeof(caps)); /* appdata.caps */
+ xdr += ARRAY_SIZE(caps);
+ *xdr++ = htonl(conn_krb5->etype); /* appdata.enctype */
+
+ *xdr++ = htonl(conn_krb5->key_len); /* appdata.callbackKey.len */
+ K0 = xdr;
+ get_random_bytes(K0, conn_krb5->key_len); /* appdata.callbackKey.data */
+ xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+ *xdr++ = htonl(contsize); /* appdata.callbackToken.len */
+ *xdr++ = htonl(1); /* cont.kvno */
+ *xdr++ = htonl(token_krb5->etype); /* cont.enctype */
+ *xdr++ = htonl(encsize); /* cont.encrypted_token.len */
+
+ encbase = xdr;
+ xdr += offset / 4;
+ *xdr++ = htonl(conn_krb5->etype); /* token.K0.enctype */
+ *xdr++ = htonl(conn_krb5->key_len); /* token.K0.key.len */
+ memcpy(xdr, K0, conn_krb5->key_len); /* token.K0.key.data */
+ xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+ *xdr++ = htonl(RXRPC_SECURITY_ENCRYPT); /* token.level */
+ *xdr++ = htonl(0); /* token.starttime */
+ *xdr++ = htonl(0); /* " */
+ *xdr++ = htonl(0); /* token.lifetime */
+ *xdr++ = htonl(0); /* token.bytelife */
+ *xdr++ = htonl(0); /* token.expirationtime */
+ *xdr++ = htonl(0); /* " */
+ *xdr++ = htonl(1); /* token.identities.count */
+ *xdr++ = htonl(0); /* token.identities[0].kind */
+ *xdr++ = htonl(uuidsize); /* token.identities[0].data.len */
+ memcpy(xdr, &server->uuid, uuidsize);
+ xdr += xdr_round_up(uuidsize) / 4;
+ *xdr++ = htonl(0); /* token.identities[0].display.len */
+
+ xdr = encbase + xdr_round_up(encsize);
+
+ if ((unsigned long)xdr - (unsigned long)appdata != adatasize)
+ pr_err("Appdata size incorrect %lx != %zx\n",
+ (unsigned long)xdr - (unsigned long)appdata, adatasize);
+
+ aead = crypto_krb5_prepare_encryption(token_krb5, token_key, RXGK_SERVER_ENC_TOKEN,
+ GFP_KERNEL);
+ if (IS_ERR(aead)) {
+ ret = PTR_ERR(aead);
+ goto out_token;
+ }
+
+ sg_init_one(&sg, encbase, encsize);
+ ret = crypto_krb5_encrypt(token_krb5, aead, &sg, 1, encsize, offset, toksize, false);
+ if (ret < 0)
+ goto out_aead;
+
+ server->cm_rxgk_appdata.len = adatasize;
+ server->cm_rxgk_appdata.data = appdata;
+ appdata = NULL;
+
+out_aead:
+ crypto_free_aead(aead);
+out_token:
+ kfree(appdata);
+out:
+ return ret;
+}
+#endif /* CONFIG_RXGK */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 99a3f20bc786..1a906805a9e3 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -139,49 +139,6 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
/*
- * Find the server record by peer address and record a probe to the cache
- * manager from a server.
- */
-static int afs_find_cm_server_by_peer(struct afs_call *call)
-{
- struct sockaddr_rxrpc srx;
- struct afs_server *server;
- struct rxrpc_peer *peer;
-
- peer = rxrpc_kernel_get_call_peer(call->net->socket, call->rxcall);
-
- server = afs_find_server(call->net, peer);
- if (!server) {
- trace_afs_cm_no_server(call, &srx);
- return 0;
- }
-
- call->server = server;
- return 0;
-}
-
-/*
- * Find the server record by server UUID and record a probe to the cache
- * manager from a server.
- */
-static int afs_find_cm_server_by_uuid(struct afs_call *call,
- struct afs_uuid *uuid)
-{
- struct afs_server *server;
-
- rcu_read_lock();
- server = afs_find_server_by_uuid(call->net, call->request);
- rcu_read_unlock();
- if (!server) {
- trace_afs_cm_no_server_u(call, call->request);
- return 0;
- }
-
- call->server = server;
- return 0;
-}
-
-/*
* Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
@@ -322,10 +279,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- /* we'll need the file server record as that tells us which set of
- * vnodes to operate upon */
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -349,18 +303,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
*/
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
- int ret;
-
_enter("");
afs_extract_discard(call, 0);
- ret = afs_extract_data(call, false);
- if (ret < 0)
- return ret;
-
- /* we'll need the file server record as that tells us which set of
- * vnodes to operate upon */
- return afs_find_cm_server_by_peer(call);
+ return afs_extract_data(call, false);
}
/*
@@ -373,8 +319,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
__be32 *b;
int ret;
- _enter("");
-
_enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
@@ -421,9 +365,13 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- /* we'll need the file server record as that tells us which set of
- * vnodes to operate upon */
- return afs_find_cm_server_by_uuid(call, call->request);
+ if (memcmp(call->request, &call->server->_uuid, sizeof(call->server->_uuid)) != 0) {
+ pr_notice("Callback UUID does not match fileserver UUID\n");
+ trace_afs_cm_no_server_u(call, call->request);
+ return 0;
+ }
+
+ return 0;
}
/*
@@ -455,7 +403,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -533,7 +481,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -593,7 +541,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
/*
@@ -667,9 +615,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- /* We'll need the file server record as that tells us which set of
- * vnodes to operate upon.
- */
- return afs_find_cm_server_by_peer(call);
+ return 0;
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 67afe68972d5..bfb69e066672 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -12,6 +12,8 @@
#include <linux/swap.h>
#include <linux/ctype.h>
#include <linux/sched.h>
+#include <linux/iversion.h>
+#include <linux/iov_iter.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
#include "afs_fs.h"
@@ -21,7 +23,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
static int afs_dir_open(struct inode *inode, struct file *file);
static int afs_readdir(struct file *file, struct dir_context *ctx);
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
+static int afs_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags);
static int afs_d_delete(const struct dentry *dentry);
static void afs_d_iput(struct dentry *dentry, struct inode *inode);
static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
@@ -30,8 +33,8 @@ static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nl
loff_t fpos, u64 ino, unsigned dtype);
static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl);
-static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode);
+static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode);
static int afs_rmdir(struct inode *dir, struct dentry *dentry);
static int afs_unlink(struct inode *dir, struct dentry *dentry);
static int afs_link(struct dentry *from, struct inode *dir,
@@ -41,15 +44,6 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
-static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags);
-static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- size_t length);
-
-static bool afs_dir_dirty_folio(struct address_space *mapping,
- struct folio *folio)
-{
- BUG(); /* This should never happen. */
-}
const struct file_operations afs_dir_file_operations = {
.open = afs_dir_open,
@@ -74,10 +68,7 @@ const struct inode_operations afs_dir_inode_operations = {
};
const struct address_space_operations afs_dir_aops = {
- .dirty_folio = afs_dir_dirty_folio,
- .release_folio = afs_dir_release_folio,
- .invalidate_folio = afs_dir_invalidate_folio,
- .migrate_folio = filemap_migrate_folio,
+ .writepages = afs_single_writepages,
};
const struct dentry_operations afs_fs_dentry_operations = {
@@ -98,152 +89,124 @@ struct afs_lookup_one_cookie {
struct afs_lookup_cookie {
struct dir_context ctx;
struct qstr name;
- bool found;
- bool one_only;
unsigned short nr_fids;
struct afs_fid fids[50];
};
+static void afs_dir_unuse_cookie(struct afs_vnode *dvnode, int ret)
+{
+ if (ret == 0) {
+ struct afs_vnode_cache_aux aux;
+ loff_t i_size = i_size_read(&dvnode->netfs.inode);
+
+ afs_set_cache_aux(dvnode, &aux);
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), &aux, &i_size);
+ } else {
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ }
+}
+
/*
- * Drop the refs that we're holding on the folios we were reading into. We've
- * got refs on the first nr_pages pages.
+ * Iterate through a kmapped directory segment, dumping a summary of
+ * the contents.
*/
-static void afs_dir_read_cleanup(struct afs_read *req)
+static size_t afs_dir_dump_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- struct address_space *mapping = req->vnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
+ do {
+ union afs_xdr_dir_block *block = iter_base;
- XA_STATE(xas, &mapping->i_pages, 0);
+ pr_warn("[%05zx] %32phN\n", progress, block);
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ progress += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
- if (unlikely(!req->nr_pages))
- return;
+ return len;
+}
- rcu_read_lock();
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
- BUG_ON(xa_is_value(folio));
- ASSERTCMP(folio->mapping, ==, mapping);
+/*
+ * Dump the contents of a directory.
+ */
+static void afs_dir_dump(struct afs_vnode *dvnode)
+{
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
- folio_put(folio);
- }
+ pr_warn("DIR %llx:%llx is=%llx\n",
+ dvnode->fid.vid, dvnode->fid.vnode, i_size);
- rcu_read_unlock();
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ iterate_folioq(&iter, iov_iter_count(&iter), NULL, NULL,
+ afs_dir_dump_step);
}
/*
* check that a directory folio is valid
*/
-static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
- loff_t i_size)
+static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress,
+ union afs_xdr_dir_block *block)
{
- union afs_xdr_dir_block *block;
- size_t offset, size;
- loff_t pos;
+ if (block->hdr.magic != AFS_DIR_MAGIC) {
+ pr_warn("%s(%lx): [%zx] bad magic %04x\n",
+ __func__, dvnode->netfs.inode.i_ino,
+ progress, ntohs(block->hdr.magic));
+ trace_afs_dir_check_failed(dvnode, progress);
+ trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
+ return false;
+ }
- /* Determine how many magic numbers there should be in this folio, but
- * we must take care because the directory may change size under us.
+ /* Make sure each block is NUL terminated so we can reasonably
+ * use string functions on it. The filenames in the folio
+ * *should* be NUL-terminated anyway.
*/
- pos = folio_pos(folio);
- if (i_size <= pos)
- goto checked;
-
- size = min_t(loff_t, folio_size(folio), i_size - pos);
- for (offset = 0; offset < size; offset += sizeof(*block)) {
- block = kmap_local_folio(folio, offset);
- if (block->hdr.magic != AFS_DIR_MAGIC) {
- printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
- __func__, dvnode->netfs.inode.i_ino,
- pos, offset, size, ntohs(block->hdr.magic));
- trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
- kunmap_local(block);
- trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
- goto error;
- }
-
- /* Make sure each block is NUL terminated so we can reasonably
- * use string functions on it. The filenames in the folio
- * *should* be NUL-terminated anyway.
- */
- ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
-
- kunmap_local(block);
- }
-checked:
+ ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
afs_stat_v(dvnode, n_read_dir);
return true;
-
-error:
- return false;
}
/*
- * Dump the contents of a directory.
+ * Iterate through a kmapped directory segment, checking the content.
*/
-static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
+static size_t afs_dir_check_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- union afs_xdr_dir_block *block;
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
- size_t offset, size;
-
- XA_STATE(xas, &mapping->i_pages, 0);
-
- pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
- dvnode->fid.vid, dvnode->fid.vnode,
- req->file_size, req->len, req->actual_len);
- pr_warn("DIR %llx %x %zx %zx\n",
- req->pos, req->nr_pages,
- req->iter->iov_offset, iov_iter_count(req->iter));
-
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
+ struct afs_vnode *dvnode = priv;
- BUG_ON(folio->mapping != mapping);
+ if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ len % AFS_DIR_BLOCK_SIZE))
+ return len;
- size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
- for (offset = 0; offset < size; offset += sizeof(*block)) {
- block = kmap_local_folio(folio, offset);
- pr_warn("[%02lx] %32phN\n", folio->index + offset, block);
- kunmap_local(block);
- }
- }
+ do {
+ if (!afs_dir_check_block(dvnode, progress, iter_base))
+ break;
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
+
+ return len;
}
/*
- * Check all the blocks in a directory. All the folios are held pinned.
+ * Check all the blocks in a directory.
*/
-static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+static int afs_dir_check(struct afs_vnode *dvnode)
{
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
- int ret = 0;
-
- XA_STATE(xas, &mapping->i_pages, 0);
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
+ size_t checked = 0;
- if (unlikely(!req->nr_pages))
+ if (unlikely(!i_size))
return 0;
- rcu_read_lock();
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
-
- BUG_ON(folio->mapping != mapping);
-
- if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
- afs_dir_dump(dvnode, req);
- ret = -EIO;
- break;
- }
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ checked = iterate_folioq(&iter, iov_iter_count(&iter), dvnode, NULL,
+ afs_dir_check_step);
+ if (checked != i_size) {
+ afs_dir_dump(dvnode);
+ return -EIO;
}
-
- rcu_read_unlock();
- return ret;
+ return 0;
}
/*
@@ -263,134 +226,140 @@ static int afs_dir_open(struct inode *inode, struct file *file)
}
/*
- * Read the directory into the pagecache in one go, scrubbing the previous
- * contents. The list of folios is returned, pinning them so that they don't
- * get reclaimed during the iteration.
+ * Read a file in a single download.
*/
-static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
- __acquires(&dvnode->validate_lock)
+static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
{
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct afs_read *req;
+ struct iov_iter iter;
+ ssize_t ret;
loff_t i_size;
- int nr_pages, i;
- int ret;
- loff_t remote_size = 0;
-
- _enter("");
-
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- return ERR_PTR(-ENOMEM);
+ bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
- refcount_set(&req->usage, 1);
- req->vnode = dvnode;
- req->key = key_get(key);
- req->cleanup = afs_dir_read_cleanup;
-
-expand:
i_size = i_size_read(&dvnode->netfs.inode);
- if (i_size < remote_size)
- i_size = remote_size;
- if (i_size < 2048) {
- ret = afs_bad(dvnode, afs_file_error_dir_small);
- goto error;
- }
- if (i_size > 2048 * 1024) {
- trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- ret = -EFBIG;
- goto error;
+ if (is_dir) {
+ if (i_size < AFS_DIR_BLOCK_SIZE)
+ return afs_bad(dvnode, afs_file_error_dir_small);
+ if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
+ } else {
+ if (i_size > AFSPATHMAX) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
}
- _enter("%llu", i_size);
+ /* Expand the storage. TODO: Shrink the storage too. */
+ if (dvnode->directory_size < i_size) {
+ size_t cur_size = dvnode->directory_size;
- nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
+ ret = netfs_alloc_folioq_buffer(NULL,
+ &dvnode->directory, &cur_size, i_size,
+ mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ dvnode->directory_size = cur_size;
+ if (ret < 0)
+ return ret;
+ }
- req->actual_len = i_size; /* May change */
- req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
- req->data_version = dvnode->status.data_version; /* May change */
- iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages,
- 0, i_size);
- req->iter = &req->def_iter;
+ iov_iter_folio_queue(&iter, ITER_DEST, dvnode->directory, 0, 0, dvnode->directory_size);
- /* Fill in any gaps that we might find where the memory reclaimer has
- * been at work and pin all the folios. If there are any gaps, we will
- * need to reread the entire directory contents.
+ /* AFS requires us to perform the read of a directory synchronously as
+ * a single unit to avoid issues with the directory contents being
+ * changed between reads.
*/
- i = req->nr_pages;
- while (i < nr_pages) {
- struct folio *folio;
-
- folio = filemap_get_folio(mapping, i);
- if (IS_ERR(folio)) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_inval);
- folio = __filemap_get_folio(mapping,
- i, FGP_LOCK | FGP_CREAT,
- mapping->gfp_mask);
- if (IS_ERR(folio)) {
- ret = PTR_ERR(folio);
- goto error;
- }
- folio_attach_private(folio, (void *)1);
- folio_unlock(folio);
+ ret = netfs_read_single(&dvnode->netfs.inode, file, &iter);
+ if (ret >= 0) {
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size > ret) {
+ /* The content has grown, so we need to expand the
+ * buffer.
+ */
+ ret = -ESTALE;
+ } else if (is_dir) {
+ int ret2 = afs_dir_check(dvnode);
+
+ if (ret2 < 0)
+ ret = ret2;
+ } else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
+ /* NUL-terminate a symlink. */
+ char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
+
+ symlink[i_size] = 0;
+ kunmap_local(symlink);
}
-
- req->nr_pages += folio_nr_pages(folio);
- i += folio_nr_pages(folio);
}
- /* If we're going to reload, we need to lock all the pages to prevent
- * races.
- */
+ return ret;
+}
+
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
+{
+ ssize_t ret;
+
+ fscache_use_cookie(afs_vnode_cache(dvnode), false);
+ ret = afs_do_read_single(dvnode, file);
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ return ret;
+}
+
+/*
+ * Read the directory into a folio_queue buffer in one go, scrubbing the
+ * previous contents. We return -ESTALE if the caller needs to call us again.
+ */
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ __acquires(&dvnode->validate_lock)
+{
+ ssize_t ret;
+ loff_t i_size;
+
+ i_size = i_size_read(&dvnode->netfs.inode);
+
ret = -ERESTARTSYS;
if (down_read_killable(&dvnode->validate_lock) < 0)
goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ /* We only need to reread the data if it became invalid - or if we
+ * haven't read it yet.
+ */
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ ret = i_size;
+ goto valid;
+ }
up_read(&dvnode->validate_lock);
if (down_write_killable(&dvnode->validate_lock) < 0)
goto error;
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- trace_afs_reload_dir(dvnode);
- ret = afs_fetch_data(dvnode, req);
- if (ret < 0)
- goto error_unlock;
-
- task_io_account_read(PAGE_SIZE * req->nr_pages);
-
- if (req->len < req->file_size) {
- /* The content has grown, so we need to expand the
- * buffer.
- */
- up_write(&dvnode->validate_lock);
- remote_size = req->file_size;
- goto expand;
- }
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_invalidate_cache(dvnode, 0);
- /* Validate the data we just read. */
- ret = afs_dir_check(dvnode, req);
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) ||
+ !test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ trace_afs_reload_dir(dvnode);
+ ret = afs_read_single(dvnode, file);
if (ret < 0)
goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+ } else {
+ ret = i_size;
}
downgrade_write(&dvnode->validate_lock);
-success:
- return req;
+valid:
+ return ret;
error_unlock:
up_write(&dvnode->validate_lock);
error:
- afs_put_read(req);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
+ _leave(" = %zd", ret);
+ return ret;
}
/*
@@ -398,79 +367,69 @@ error:
*/
static int afs_dir_iterate_block(struct afs_vnode *dvnode,
struct dir_context *ctx,
- union afs_xdr_dir_block *block,
- unsigned blkoff)
+ union afs_xdr_dir_block *block)
{
union afs_xdr_dirent *dire;
- unsigned offset, next, curr, nr_slots;
+ unsigned int blknum, base, hdr, pos, next, nr_slots;
size_t nlen;
int tmp;
- _enter("%llx,%x", ctx->pos, blkoff);
+ blknum = ctx->pos / AFS_DIR_BLOCK_SIZE;
+ base = blknum * AFS_DIR_SLOTS_PER_BLOCK;
+ hdr = (blknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+ pos = DIV_ROUND_UP(ctx->pos, AFS_DIR_DIRENT_SIZE) - base;
- curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
+ _enter("%llx,%x", ctx->pos, blknum);
/* walk through the block, an entry at a time */
- for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
- offset < AFS_DIR_SLOTS_PER_BLOCK;
- offset = next
- ) {
+ for (unsigned int slot = hdr; slot < AFS_DIR_SLOTS_PER_BLOCK; slot = next) {
/* skip entries marked unused in the bitmap */
- if (!(block->hdr.bitmap[offset / 8] &
- (1 << (offset % 8)))) {
- _debug("ENT[%zu.%u]: unused",
- blkoff / sizeof(union afs_xdr_dir_block), offset);
- next = offset + 1;
- if (offset >= curr)
- ctx->pos = blkoff +
- next * sizeof(union afs_xdr_dirent);
+ if (!(block->hdr.bitmap[slot / 8] &
+ (1 << (slot % 8)))) {
+ _debug("ENT[%x]: Unused", base + slot);
+ next = slot + 1;
+ if (next >= pos)
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
continue;
}
/* got a valid entry */
- dire = &block->dirents[offset];
+ dire = &block->dirents[slot];
nlen = strnlen(dire->u.name,
- sizeof(*block) -
- offset * sizeof(union afs_xdr_dirent));
+ (unsigned long)(block + 1) - (unsigned long)dire->u.name - 1);
if (nlen > AFSNAMEMAX - 1) {
- _debug("ENT[%zu]: name too long (len %u/%zu)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, nlen);
+ _debug("ENT[%x]: Name too long (len %zx)",
+ base + slot, nlen);
return afs_bad(dvnode, afs_file_error_dir_name_too_long);
}
- _debug("ENT[%zu.%u]: %s %zu \"%s\"",
- blkoff / sizeof(union afs_xdr_dir_block), offset,
- (offset < curr ? "skip" : "fill"),
+ _debug("ENT[%x]: %s %zx \"%s\"",
+ base + slot, (slot < pos ? "skip" : "fill"),
nlen, dire->u.name);
nr_slots = afs_dir_calc_slots(nlen);
- next = offset + nr_slots;
+ next = slot + nr_slots;
if (next > AFS_DIR_SLOTS_PER_BLOCK) {
- _debug("ENT[%zu.%u]:"
- " %u extends beyond end dir block"
- " (len %zu)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, next, nlen);
+ _debug("ENT[%x]: extends beyond end dir block (len %zx)",
+ base + slot, nlen);
return afs_bad(dvnode, afs_file_error_dir_over_end);
}
/* Check that the name-extension dirents are all allocated */
for (tmp = 1; tmp < nr_slots; tmp++) {
- unsigned int ix = offset + tmp;
- if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) {
- _debug("ENT[%zu.u]:"
- " %u unmarked extension (%u/%u)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, tmp, nr_slots);
+ unsigned int xslot = slot + tmp;
+
+ if (!(block->hdr.bitmap[xslot / 8] & (1 << (xslot % 8)))) {
+ _debug("ENT[%x]: Unmarked extension (%x/%x)",
+ base + slot, tmp, nr_slots);
return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
}
}
/* skip if starts before the current position */
- if (offset < curr) {
- if (next > curr)
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ if (slot < pos) {
+ if (next > pos)
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
continue;
}
@@ -484,75 +443,110 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
return 0;
}
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
}
_leave(" = 1 [more]");
return 1;
}
+struct afs_dir_iteration_ctx {
+ struct dir_context *dir_ctx;
+ int error;
+};
+
/*
- * iterate through the data blob that lists the contents of an AFS directory
+ * Iterate through a kmapped directory segment.
*/
-static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
- struct key *key, afs_dataversion_t *_dir_version)
+static size_t afs_dir_iterate_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- struct afs_vnode *dvnode = AFS_FS_I(dir);
- union afs_xdr_dir_block *dblock;
- struct afs_read *req;
- struct folio *folio;
- unsigned offset, size;
+ struct afs_dir_iteration_ctx *ctx = priv2;
+ struct afs_vnode *dvnode = priv;
int ret;
- _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
-
- if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
- _leave(" = -ESTALE");
- return -ESTALE;
+ if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ len % AFS_DIR_BLOCK_SIZE)) {
+ pr_err("Mis-iteration prog=%zx len=%zx\n",
+ progress % AFS_DIR_BLOCK_SIZE,
+ len % AFS_DIR_BLOCK_SIZE);
+ return len;
}
- req = afs_read_dir(dvnode, key);
- if (IS_ERR(req))
- return PTR_ERR(req);
- *_dir_version = req->data_version;
+ do {
+ ret = afs_dir_iterate_block(dvnode, ctx->dir_ctx, iter_base);
+ if (ret != 1)
+ break;
- /* round the file position up to the next entry boundary */
- ctx->pos += sizeof(union afs_xdr_dirent) - 1;
- ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1);
+ ctx->dir_ctx->pos = round_up(ctx->dir_ctx->pos, AFS_DIR_BLOCK_SIZE);
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
- /* walk through the blocks in sequence */
- ret = 0;
- while (ctx->pos < req->actual_len) {
- /* Fetch the appropriate folio from the directory and re-add it
- * to the LRU. We have all the pages pinned with an extra ref.
- */
- folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
- FGP_ACCESSED, 0);
- if (IS_ERR(folio)) {
- ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
- break;
- }
+ return len;
+}
- offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
- size = min_t(loff_t, folio_size(folio),
- req->actual_len - folio_file_pos(folio));
+/*
+ * Iterate through the directory folios.
+ */
+static int afs_dir_iterate_contents(struct inode *dir, struct dir_context *dir_ctx)
+{
+ struct afs_dir_iteration_ctx ctx = { .dir_ctx = dir_ctx };
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(dir);
- do {
- dblock = kmap_local_folio(folio, offset);
- ret = afs_dir_iterate_block(dvnode, ctx, dblock,
- folio_file_pos(folio) + offset);
- kunmap_local(dblock);
- if (ret != 1)
- goto out;
+ /* Round the file position up to the next entry boundary */
+ dir_ctx->pos = round_up(dir_ctx->pos, sizeof(union afs_xdr_dirent));
- } while (offset += sizeof(*dblock), offset < size);
+ if (i_size <= 0 || dir_ctx->pos >= i_size)
+ return 0;
- ret = 0;
- }
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ iov_iter_advance(&iter, round_down(dir_ctx->pos, AFS_DIR_BLOCK_SIZE));
+
+ iterate_folioq(&iter, iov_iter_count(&iter), dvnode, &ctx,
+ afs_dir_iterate_step);
+
+ if (ctx.error == -ESTALE)
+ afs_invalidate_dir(dvnode, afs_dir_invalid_iter_stale);
+ return ctx.error;
+}
+
+/*
+ * iterate through the data blob that lists the contents of an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
+ struct file *file, afs_dataversion_t *_dir_version)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int retry_limit = 100;
+ int ret;
+
+ _enter("{%lu},%llx,,", dir->i_ino, ctx->pos);
+
+ do {
+ if (--retry_limit < 0) {
+ pr_warn("afs_read_dir(): Too many retries\n");
+ ret = -ESTALE;
+ break;
+ }
+ ret = afs_read_dir(dvnode, file);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ break;
+ if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+ ret = -ESTALE;
+ break;
+ }
+ continue;
+ }
+ *_dir_version = inode_peek_iversion_raw(dir);
+
+ ret = afs_dir_iterate_contents(dir, ctx);
+ up_read(&dvnode->validate_lock);
+ } while (ret == -ESTALE);
-out:
- up_read(&dvnode->validate_lock);
- afs_put_read(req);
_leave(" = %d", ret);
return ret;
}
@@ -564,8 +558,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
{
afs_dataversion_t dir_version;
- return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
- &dir_version);
+ return afs_dir_iterate(file_inode(file), ctx, file, &dir_version);
}
/*
@@ -605,22 +598,22 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
* Do a lookup of a single name in a directory
* - just returns the FID the dentry name maps to if found
*/
-static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
- struct afs_fid *fid, struct key *key,
+static int afs_do_lookup_one(struct inode *dir, const struct qstr *name,
+ struct afs_fid *fid,
afs_dataversion_t *_dir_version)
{
struct afs_super_info *as = dir->i_sb->s_fs_info;
struct afs_lookup_one_cookie cookie = {
.ctx.actor = afs_lookup_one_filldir,
- .name = dentry->d_name,
+ .name = *name,
.fid.vid = as->volume->vid
};
int ret;
- _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+ _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name);
/* search the directory */
- ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
+ ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
if (ret < 0) {
_leave(" = %d [iter]", ret);
return ret;
@@ -655,19 +648,10 @@ static bool afs_lookup_filldir(struct dir_context *ctx, const char *name,
BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
- if (cookie->found) {
- if (cookie->nr_fids < 50) {
- cookie->fids[cookie->nr_fids].vnode = ino;
- cookie->fids[cookie->nr_fids].unique = dtype;
- cookie->nr_fids++;
- }
- } else if (cookie->name.len == nlen &&
- memcmp(cookie->name.name, name, nlen) == 0) {
- cookie->fids[1].vnode = ino;
- cookie->fids[1].unique = dtype;
- cookie->found = 1;
- if (cookie->one_only)
- return false;
+ if (cookie->nr_fids < 50) {
+ cookie->fids[cookie->nr_fids].vnode = ino;
+ cookie->fids[cookie->nr_fids].unique = dtype;
+ cookie->nr_fids++;
}
return cookie->nr_fids < 50;
@@ -787,8 +771,7 @@ static bool afs_server_supports_ibulk(struct afs_vnode *dvnode)
* files in one go and create inodes for them. The inode of the file we were
* asked for is returned.
*/
-static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
- struct key *key)
+static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
{
struct afs_lookup_cookie *cookie;
struct afs_vnode_param *vp;
@@ -796,6 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct inode *inode = NULL, *ti;
afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
+ bool supports_ibulk;
long ret;
int i;
@@ -812,19 +796,19 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want
* and slot 0 for the directory */
- if (!afs_server_supports_ibulk(dvnode))
- cookie->one_only = true;
-
- /* search the directory */
- ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
+ /* Search the directory for the named entry using the hash table... */
+ ret = afs_dir_search(dvnode, &dentry->d_name, &cookie->fids[1], &data_version);
if (ret < 0)
goto out;
- dentry->d_fsdata = (void *)(unsigned long)data_version;
+ supports_ibulk = afs_server_supports_ibulk(dvnode);
+ if (supports_ibulk) {
+ /* ...then scan linearly from that point for entries to lookup-ahead. */
+ cookie->ctx.pos = (ret + 1) * AFS_DIR_DIRENT_SIZE;
+ afs_dir_iterate(dir, &cookie->ctx, NULL, &data_version);
+ }
- ret = -ENOENT;
- if (!cookie->found)
- goto out;
+ dentry->d_fsdata = (void *)(unsigned long)data_version;
/* Check to see if we already have an inode for the primary fid. */
inode = ilookup5(dir->i_sb, cookie->fids[1].vnode,
@@ -883,7 +867,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
* the whole operation.
*/
afs_op_set_error(op, -ENOTSUPP);
- if (!cookie->one_only) {
+ if (supports_ibulk) {
op->ops = &afs_inline_bulk_status_operation;
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
@@ -925,8 +909,7 @@ out:
/*
* Look up an entry in a directory with @sys substitution.
*/
-static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
- struct key *key)
+static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry)
{
struct afs_sysnames *subs;
struct afs_net *net = afs_i2net(dir);
@@ -960,7 +943,7 @@ static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
}
strcpy(p, name);
- ret = lookup_one_len(buf, dentry->d_parent, len);
+ ret = lookup_noperm(&QSTR(buf), dentry->d_parent);
if (IS_ERR(ret) || d_is_positive(ret))
goto out_s;
dput(ret);
@@ -974,7 +957,6 @@ out_s:
afs_put_sysnames(subs);
kfree(buf);
out_p:
- key_put(key);
return ret;
}
@@ -988,7 +970,6 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
struct afs_fid fid = {};
struct inode *inode;
struct dentry *d;
- struct key *key;
int ret;
_enter("{%llx:%llu},%p{%pd},",
@@ -1006,15 +987,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ESTALE);
}
- key = afs_request_key(dvnode->volume->cell);
- if (IS_ERR(key)) {
- _leave(" = %ld [key]", PTR_ERR(key));
- return ERR_CAST(key);
- }
-
- ret = afs_validate(dvnode, key);
+ ret = afs_validate(dvnode, NULL);
if (ret < 0) {
- key_put(key);
+ afs_dir_unuse_cookie(dvnode, ret);
_leave(" = %d [val]", ret);
return ERR_PTR(ret);
}
@@ -1024,15 +999,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
dentry->d_name.name[dentry->d_name.len - 1] == 's')
- return afs_lookup_atsys(dir, dentry, key);
+ return afs_lookup_atsys(dir, dentry);
afs_stat_v(dvnode, n_lookup);
- inode = afs_do_lookup(dir, dentry, key);
- key_put(key);
+ inode = afs_do_lookup(dir, dentry);
if (inode == ERR_PTR(-ENOENT))
- inode = afs_try_auto_mntpt(dentry, dir);
-
- if (!IS_ERR_OR_NULL(inode))
+ inode = NULL;
+ else if (!IS_ERR_OR_NULL(inode))
fid = AFS_FS_I(inode)->fid;
_debug("splice %p", dentry->d_inode);
@@ -1050,21 +1023,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
/*
* Check the validity of a dentry under RCU conditions.
*/
-static int afs_d_revalidate_rcu(struct dentry *dentry)
+static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry)
{
- struct afs_vnode *dvnode;
- struct dentry *parent;
- struct inode *dir;
long dir_version, de_version;
_enter("%p", dentry);
- /* Check the parent directory is still valid first. */
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- dvnode = AFS_FS_I(dir);
if (test_bit(AFS_VNODE_DELETED, &dvnode->flags))
return -ECHILD;
@@ -1092,11 +1056,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry)
* - NOTE! the hit can be a negative hit too, so we can't assume we have an
* inode
*/
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- struct afs_vnode *vnode, *dir;
+ struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir);
struct afs_fid fid;
- struct dentry *parent;
struct inode *inode;
struct key *key;
afs_dataversion_t dir_version, invalid_before;
@@ -1104,7 +1068,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
int ret;
if (flags & LOOKUP_RCU)
- return afs_d_revalidate_rcu(dentry);
+ return afs_d_revalidate_rcu(dir, dentry);
if (d_really_is_positive(dentry)) {
vnode = AFS_FS_I(d_inode(dentry));
@@ -1119,14 +1083,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (IS_ERR(key))
key = NULL;
- /* Hold the parent dentry so we can peer at it */
- parent = dget_parent(dentry);
- dir = AFS_FS_I(d_inode(parent));
-
/* validate the parent directory */
ret = afs_validate(dir, key);
if (ret == -ERESTARTSYS) {
- dput(parent);
key_put(key);
return ret;
}
@@ -1154,7 +1113,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
afs_stat_v(dir, n_reval);
/* search the directory for this vnode */
- ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version);
+ ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version);
switch (ret) {
case 0:
/* the filename maps to something */
@@ -1198,22 +1157,19 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out_valid;
default:
- _debug("failed to iterate dir %pd: %d",
- parent, ret);
+ _debug("failed to iterate parent %pd2: %d", dentry, ret);
goto not_found;
}
out_valid:
dentry->d_fsdata = (void *)(unsigned long)dir_version;
out_valid_noupdate:
- dput(parent);
key_put(key);
_leave(" = 1 [valid]");
return 1;
not_found:
_debug("dropping dentry %pd2", dentry);
- dput(parent);
key_put(key);
_leave(" = 0 [bad]");
@@ -1281,6 +1237,7 @@ void afs_check_for_remote_deletion(struct afs_operation *op)
*/
static void afs_vnode_new_inode(struct afs_operation *op)
{
+ struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode_param *vp = &op->file[1];
struct afs_vnode *vnode;
struct inode *inode;
@@ -1300,6 +1257,10 @@ static void afs_vnode_new_inode(struct afs_operation *op)
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ if (S_ISDIR(inode->i_mode))
+ afs_mkdir_init_dir(vnode, dvp->vnode);
+ else if (S_ISLNK(inode->i_mode))
+ afs_init_new_symlink(vnode, op);
if (!afs_op_error(op))
afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
d_instantiate(op->dentry, inode);
@@ -1316,18 +1277,21 @@ static void afs_create_success(struct afs_operation *op)
static void afs_create_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode_param *vp = &op->file[1];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid,
op->create.reason);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_create_put(struct afs_operation *op)
@@ -1350,11 +1314,12 @@ static const struct afs_operation_ops afs_mkdir_operation = {
/*
* create a directory on an AFS filesystem
*/
-static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct afs_operation *op;
struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int ret;
_enter("{%llx:%llu},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@ -1362,9 +1327,11 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
op = afs_alloc_operation(NULL, dvnode->volume);
if (IS_ERR(op)) {
d_drop(dentry);
- return PTR_ERR(op);
+ return ERR_CAST(op);
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1374,7 +1341,9 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
op->create.reason = afs_edit_dir_for_mkdir;
op->mtime = current_time(dir);
op->ops = &afs_mkdir_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ERR_PTR(ret);
}
/*
@@ -1387,8 +1356,8 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
clear_nlink(&vnode->netfs.inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_rmdir);
+ afs_invalidate_dir(vnode, afs_dir_invalid_subdir_removed);
}
}
@@ -1402,18 +1371,21 @@ static void afs_rmdir_success(struct afs_operation *op)
static void afs_rmdir_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
afs_dir_remove_subdir(op->dentry);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_remove(dvnode, &op->dentry->d_name,
afs_edit_dir_for_rmdir);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_rmdir_put(struct afs_operation *op)
@@ -1448,6 +1420,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1471,10 +1445,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
op->file[1].vnode = vnode;
}
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+
+ /* Not all systems that can host afs servers have ENOTEMPTY. */
+ if (ret == -EEXIST)
+ ret = -ENOTEMPTY;
+out:
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
- return afs_put_operation(op);
+ ret = afs_put_operation(op);
+ goto out;
}
/*
@@ -1537,16 +1519,19 @@ static void afs_unlink_success(struct afs_operation *op)
static void afs_unlink_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_remove(dvnode, &op->dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_unlink_put(struct afs_operation *op)
@@ -1585,6 +1570,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1631,10 +1618,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
afs_wait_for_operation(op);
}
- return afs_put_operation(op);
-
error:
- return afs_put_operation(op);
+ ret = afs_put_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
}
static const struct afs_operation_ops afs_create_operation = {
@@ -1668,6 +1655,8 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1678,7 +1667,9 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
op->create.reason = afs_edit_dir_for_create;
op->mtime = current_time(dir);
op->ops = &afs_create_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
d_drop(dentry);
@@ -1743,6 +1734,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
ret = afs_validate(vnode, op->key);
if (ret < 0)
goto error_op;
@@ -1758,10 +1751,13 @@ static int afs_link(struct dentry *from, struct inode *dir,
op->dentry_2 = from;
op->ops = &afs_link_operation;
op->create.reason = afs_edit_dir_for_link;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error_op:
afs_put_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
error:
d_drop(dentry);
_leave(" = %d", ret);
@@ -1805,6 +1801,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
@@ -1813,7 +1811,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
op->create.reason = afs_edit_dir_for_symlink;
op->create.symlink = content;
op->mtime = current_time(dir);
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
d_drop(dentry);
@@ -1823,6 +1823,8 @@ error:
static void afs_rename_success(struct afs_operation *op)
{
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+
_enter("op=%08x", op->debug_id);
op->ctime = op->file[0].scb.status.mtime_client;
@@ -1832,10 +1834,28 @@ static void afs_rename_success(struct afs_operation *op)
op->ctime = op->file[1].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[1]);
}
+
+ /* If we're moving a subdir between dirs, we need to update
+ * its DV counter too as the ".." will be altered.
+ */
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ op->file[0].vnode != op->file[1].vnode) {
+ u64 new_dv;
+
+ write_seqlock(&vnode->cb_lock);
+
+ new_dv = vnode->status.data_version + 1;
+ trace_afs_set_dv(vnode, new_dv);
+ vnode->status.data_version = new_dv;
+ inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
+
+ write_sequnlock(&vnode->cb_lock);
+ }
}
static void afs_rename_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources orig_cres = {}, new_cres = {};
struct afs_vnode_param *orig_dvp = &op->file[0];
struct afs_vnode_param *new_dvp = &op->file[1];
struct afs_vnode *orig_dvnode = orig_dvp->vnode;
@@ -1852,6 +1872,10 @@ static void afs_rename_edit_dir(struct afs_operation *op)
op->rename.rehash = NULL;
}
+ fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode));
+ if (new_dvnode != orig_dvnode)
+ fscache_begin_write_operation(&new_cres, afs_vnode_cache(new_dvnode));
+
down_write(&orig_dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
@@ -1873,6 +1897,12 @@ static void afs_rename_edit_dir(struct afs_operation *op)
&vnode->fid, afs_edit_dir_for_rename_2);
}
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ new_dvnode != orig_dvnode &&
+ test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_edit_dir_update_dotdot(vnode, new_dvnode,
+ afs_edit_dir_for_rename_sub);
+
new_inode = d_inode(new_dentry);
if (new_inode) {
spin_lock(&new_inode->i_lock);
@@ -1895,6 +1925,9 @@ static void afs_rename_edit_dir(struct afs_operation *op)
d_move(old_dentry, new_dentry);
up_write(&new_dvnode->validate_lock);
+ fscache_end_operation(&orig_cres);
+ if (new_dvnode != orig_dvnode)
+ fscache_end_operation(&new_cres);
}
static void afs_rename_put(struct afs_operation *op)
@@ -1947,6 +1980,10 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(orig_dvnode), true);
+ if (new_dvnode != orig_dvnode)
+ fscache_use_cookie(afs_vnode_cache(new_dvnode), true);
+
ret = afs_validate(vnode, op->key);
afs_op_set_error(op, ret);
if (ret < 0)
@@ -2014,47 +2051,43 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
*/
d_drop(old_dentry);
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+out:
+ afs_dir_unuse_cookie(orig_dvnode, ret);
+ if (new_dvnode != orig_dvnode)
+ afs_dir_unuse_cookie(new_dvnode, ret);
+ return ret;
error:
- return afs_put_operation(op);
-}
-
-/*
- * Release a directory folio and clean up its private state if it's not busy
- * - return true if the folio can now be released, false if not
- */
-static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
-{
- struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index);
-
- folio_detach_private(folio);
-
- /* The directory will need reloading. */
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_relpg);
- return true;
+ ret = afs_put_operation(op);
+ goto out;
}
/*
- * Invalidate part or all of a folio.
+ * Write the file contents to the cache as a single blob.
*/
-static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- size_t length)
+int afs_single_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{%lu},%zu,%zu", folio->index, offset, length);
-
- BUG_ON(!folio_test_locked(folio));
+ struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
+ int ret = 0;
- /* The directory will need reloading. */
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_inval);
+ /* Need to lock to prevent the folio queue and folios from being thrown
+ * away.
+ */
+ down_read(&dvnode->validate_lock);
+
+ if (is_dir ?
+ test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
+ atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
+ i_size_read(&dvnode->netfs.inode));
+ ret = netfs_writeback_single(mapping, wbc, &iter);
+ }
- /* we clean up only if the entire folio is being invalidated */
- if (offset == 0 && length == folio_size(folio))
- folio_detach_private(folio);
+ up_read(&dvnode->validate_lock);
+ return ret;
}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index e2fa577b66fe..60a549f1d9c5 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -10,6 +10,7 @@
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/iversion.h>
+#include <linux/folio_queue.h>
#include "internal.h"
#include "xdr_fs.h"
@@ -105,32 +106,66 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
}
/*
- * Get a new directory folio.
+ * Get a specific block, extending the directory storage to cover it as needed.
*/
-static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
+static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block)
{
- struct address_space *mapping = vnode->netfs.inode.i_mapping;
+ struct folio_queue *fq;
+ struct afs_vnode *dvnode = iter->dvnode;
struct folio *folio;
+ size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+ size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+ int ret;
+
+ if (dvnode->directory_size < blend) {
+ size_t cur_size = dvnode->directory_size;
+
+ ret = netfs_alloc_folioq_buffer(
+ NULL, &dvnode->directory, &cur_size, blend,
+ mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ dvnode->directory_size = cur_size;
+ if (ret < 0)
+ goto fail;
+ }
- folio = __filemap_get_folio(mapping, index,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- mapping->gfp_mask);
- if (IS_ERR(folio)) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- return NULL;
+ fq = iter->fq;
+ if (!fq)
+ fq = dvnode->directory;
+
+ /* Search the folio queue for the folio containing the block... */
+ for (; fq; fq = fq->next) {
+ for (int s = iter->fq_slot; s < folioq_count(fq); s++) {
+ size_t fsize = folioq_folio_size(fq, s);
+
+ if (blend <= fpos + fsize) {
+ /* ... and then return the mapped block. */
+ folio = folioq_folio(fq, s);
+ if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+ goto fail;
+ iter->fq = fq;
+ iter->fq_slot = s;
+ iter->fpos = fpos;
+ return kmap_local_folio(folio, blpos - fpos);
+ }
+ fpos += fsize;
+ }
+ iter->fq_slot = 0;
}
- if (!folio_test_private(folio))
- folio_attach_private(folio, (void *)1);
- return folio;
+
+fail:
+ iter->fq = NULL;
+ iter->fq_slot = 0;
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+ return NULL;
}
/*
* Scan a directory block looking for a dirent of the right name.
*/
-static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
+static int afs_dir_scan_block(const union afs_xdr_dir_block *block, const struct qstr *name,
unsigned int blocknum)
{
- union afs_xdr_dirent *de;
+ const union afs_xdr_dirent *de;
u64 bitmap;
int d, len, n;
@@ -209,9 +244,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
{
union afs_xdr_dir_block *meta, *block;
union afs_xdr_dirent *de;
- struct folio *folio0, *folio;
- unsigned int need_slots, nr_blocks, b;
- pgoff_t index;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ unsigned int nr_blocks, b, entry;
loff_t i_size;
int slot;
@@ -220,20 +254,17 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
i_size = i_size_read(&vnode->netfs.inode);
if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_bad_size);
return;
}
- folio0 = afs_dir_get_folio(vnode, 0);
- if (!folio0) {
- _leave(" [fgp]");
+ meta = afs_dir_get_block(&iter, 0);
+ if (!meta)
return;
- }
/* Work out how many slots we're going to need. */
- need_slots = afs_dir_calc_slots(name->len);
+ iter.nr_slots = afs_dir_calc_slots(name->len);
- meta = kmap_local_folio(folio0, 0);
if (i_size == 0)
goto new_directory;
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
@@ -245,22 +276,21 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
/* If the directory extended into a new folio, then we need to
* tack a new folio on the end.
*/
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
- goto error;
- if (index >= folio_nr_pages(folio0)) {
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
- goto error;
- } else {
- folio = folio0;
- }
+ goto error_too_many_blocks;
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+ /* Lower dir blocks have a counter in the header we can check. */
+ if (b < AFS_DIR_BLOCKS_WITH_CTR &&
+ meta->meta.alloc_ctrs[b] < iter.nr_slots)
+ continue;
+
+ block = afs_dir_get_block(&iter, b);
+ if (!block)
+ goto error;
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
+ goto already_invalidated;
_debug("block %u: %2u %3u %u",
b,
@@ -275,31 +305,23 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
}
- /* Only lower dir blocks have a counter in the header. */
- if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
- meta->meta.alloc_ctrs[b] >= need_slots) {
- /* We need to try and find one or more consecutive
- * slots to hold the entry.
- */
- slot = afs_find_contig_bits(block, need_slots);
- if (slot >= 0) {
- _debug("slot %u", slot);
- goto found_space;
- }
+ /* We need to try and find one or more consecutive slots to
+ * hold the entry.
+ */
+ slot = afs_find_contig_bits(block, iter.nr_slots);
+ if (slot >= 0) {
+ _debug("slot %u", slot);
+ goto found_space;
}
kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
}
/* There are no spare slots of sufficient size, yet the operation
* succeeded. Download the directory again.
*/
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_no_slots);
goto out_unmap;
new_directory:
@@ -307,8 +329,7 @@ new_directory:
i_size = AFS_DIR_BLOCK_SIZE;
afs_set_i_size(vnode, i_size);
slot = AFS_DIR_RESV_BLOCKS0;
- folio = folio0;
- block = kmap_local_folio(folio, 0);
+ block = afs_dir_get_block(&iter, 0);
nr_blocks = 1;
b = 0;
@@ -326,41 +347,39 @@ found_space:
de->u.name[name->len] = 0;
/* Adjust the bitmap. */
- afs_set_contig_bits(block, slot, need_slots);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ afs_set_contig_bits(block, slot, iter.nr_slots);
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
- meta->meta.alloc_ctrs[b] -= need_slots;
+ meta->meta.alloc_ctrs[b] -= iter.nr_slots;
+
+ /* Adjust the hash chain. */
+ entry = b * AFS_DIR_SLOTS_PER_BLOCK + slot;
+ iter.bucket = afs_dir_hash_name(name);
+ de->u.hash_next = meta->meta.hashtable[iter.bucket];
+ meta->meta.hashtable[iter.bucket] = htons(entry);
+ kunmap_local(block);
inode_inc_iversion_raw(&vnode->netfs.inode);
afs_stat_v(vnode, n_dir_cr);
_debug("Insert %s in %u[%u]", name->name, b, slot);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+
out_unmap:
kunmap_local(meta);
- folio_unlock(folio0);
- folio_put(folio0);
_leave("");
return;
-invalidated:
+already_invalidated:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
goto out_unmap;
+error_too_many_blocks:
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_too_many_blocks);
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
@@ -374,13 +393,14 @@ error:
void afs_edit_dir_remove(struct afs_vnode *vnode,
struct qstr *name, enum afs_edit_dir_reason why)
{
- union afs_xdr_dir_block *meta, *block;
- union afs_xdr_dirent *de;
- struct folio *folio0, *folio;
- unsigned int need_slots, nr_blocks, b;
- pgoff_t index;
+ union afs_xdr_dir_block *meta, *block, *pblock;
+ union afs_xdr_dirent *de, *pde;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ struct afs_fid fid;
+ unsigned int b, slot, entry;
loff_t i_size;
- int slot;
+ __be16 next;
+ int found;
_enter(",,{%d,%s},", name->len, name->name);
@@ -388,81 +408,95 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
if (i_size < AFS_DIR_BLOCK_SIZE ||
i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_bad_size);
return;
}
- nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
- folio0 = afs_dir_get_folio(vnode, 0);
- if (!folio0) {
- _leave(" [fgp]");
+ if (!afs_dir_init_iter(&iter, name))
return;
- }
-
- /* Work out how many slots we're going to discard. */
- need_slots = afs_dir_calc_slots(name->len);
- meta = kmap_local_folio(folio0, 0);
-
- /* Find a block that has sufficient slots available. Each folio
- * contains two or more directory blocks.
- */
- for (b = 0; b < nr_blocks; b++) {
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
- if (index >= folio_nr_pages(folio0)) {
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
- goto error;
- } else {
- folio = folio0;
- }
-
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
-
- /* Abandon the edit if we got a callback break. */
- if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
-
- if (b > AFS_DIR_BLOCKS_WITH_CTR ||
- meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
- slot = afs_dir_scan_block(block, name, b);
- if (slot >= 0)
- goto found_dirent;
- }
+ meta = afs_dir_find_block(&iter, 0);
+ if (!meta)
+ return;
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ /* Find the entry in the blob. */
+ found = afs_dir_search_bucket(&iter, name, &fid);
+ if (found < 0) {
+ /* Didn't find the dirent to clobber. Re-download. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
+ 0, 0, 0, 0, name->name);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_wrong_name);
+ goto out_unmap;
}
- /* Didn't find the dirent to clobber. Download the directory again. */
- trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
- 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- goto out_unmap;
+ entry = found;
+ b = entry / AFS_DIR_SLOTS_PER_BLOCK;
+ slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
-found_dirent:
+ block = afs_dir_find_block(&iter, b);
+ if (!block)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto already_invalidated;
+
+ /* Check and clear the entry. */
de = &block->dirents[slot];
+ if (de->u.valid != 1)
+ goto error_unmap;
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
ntohl(de->u.vnode), ntohl(de->u.unique),
name->name);
- memset(de, 0, sizeof(*de) * need_slots);
-
/* Adjust the bitmap. */
- afs_clear_contig_bits(block, slot, need_slots);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ afs_clear_contig_bits(block, slot, iter.nr_slots);
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
- meta->meta.alloc_ctrs[b] += need_slots;
+ meta->meta.alloc_ctrs[b] += iter.nr_slots;
+
+ /* Clear the constituent entries. */
+ next = de->u.hash_next;
+ memset(de, 0, sizeof(*de) * iter.nr_slots);
+ kunmap_local(block);
+
+ /* Adjust the hash chain: if iter->prev_entry is 0, the hashtable head
+ * index is previous; otherwise it's slot number of the previous entry.
+ */
+ if (!iter.prev_entry) {
+ __be16 prev_next = meta->meta.hashtable[iter.bucket];
+
+ if (unlikely(prev_next != htons(entry))) {
+ pr_warn("%llx:%llx:%x: not head of chain b=%x p=%x,%x e=%x %*s",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ iter.bucket, iter.prev_entry, prev_next, entry,
+ name->len, name->name);
+ goto error;
+ }
+ meta->meta.hashtable[iter.bucket] = next;
+ } else {
+ unsigned int pb = iter.prev_entry / AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int ps = iter.prev_entry % AFS_DIR_SLOTS_PER_BLOCK;
+ __be16 prev_next;
+
+ pblock = afs_dir_find_block(&iter, pb);
+ if (!pblock)
+ goto error;
+ pde = &pblock->dirents[ps];
+ prev_next = pde->u.hash_next;
+ if (prev_next != htons(entry)) {
+ kunmap_local(pblock);
+ pr_warn("%llx:%llx:%x: not prev in chain b=%x p=%x,%x e=%x %*s",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ iter.bucket, iter.prev_entry, prev_next, entry,
+ name->len, name->name);
+ goto error;
+ }
+ pde->u.hash_next = next;
+ kunmap_local(pblock);
+ }
+
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
afs_stat_v(vnode, n_dir_rm);
@@ -470,25 +504,145 @@ found_dirent:
out_unmap:
kunmap_local(meta);
- folio_unlock(folio0);
- folio_put(folio0);
_leave("");
return;
-invalidated:
+already_invalidated:
+ kunmap_local(block);
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
goto out_unmap;
+error_unmap:
+ kunmap_local(block);
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
+
+/*
+ * Edit a subdirectory that has been moved between directories to update the
+ * ".." entry.
+ */
+void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
+ enum afs_edit_dir_reason why)
+{
+ union afs_xdr_dir_block *block;
+ union afs_xdr_dirent *de;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ unsigned int nr_blocks, b;
+ loff_t i_size;
+ int slot;
+
+ _enter("");
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size < AFS_DIR_BLOCK_SIZE) {
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size);
+ return;
+ }
+
+ nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+ /* Find a block that has sufficient slots available. Each folio
+ * contains two or more directory blocks.
+ */
+ for (b = 0; b < nr_blocks; b++) {
+ block = afs_dir_get_block(&iter, b);
+ if (!block)
+ goto error;
+
+ /* Abandon the edit if we got a callback break. */
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto already_invalidated;
+
+ slot = afs_dir_scan_block(block, &dotdot_name, b);
+ if (slot >= 0)
+ goto found_dirent;
+
+ kunmap_local(block);
+ }
+
+ /* Didn't find the dirent to clobber. Download the directory again. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
+ 0, 0, 0, 0, "..");
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_no_dd);
+ goto out;
+
+found_dirent:
+ de = &block->dirents[slot];
+ de->u.vnode = htonl(new_dvnode->fid.vnode);
+ de->u.unique = htonl(new_dvnode->fid.unique);
+
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_dd, b, slot,
+ ntohl(de->u.vnode), ntohl(de->u.unique), "..");
+
+ kunmap_local(block);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+ inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+
+out:
+ _leave("");
+ return;
+
+already_invalidated:
+ kunmap_local(block);
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
+ 0, 0, 0, 0, "..");
+ goto out;
+
+error:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
+ 0, 0, 0, 0, "..");
+ goto out;
+}
+
+/*
+ * Initialise a new directory. We need to fill in the "." and ".." entries.
+ */
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_dvnode)
+{
+ union afs_xdr_dir_block *meta;
+ struct afs_dir_iter iter = { .dvnode = dvnode };
+ union afs_xdr_dirent *de;
+ unsigned int slot = AFS_DIR_RESV_BLOCKS0;
+ loff_t i_size;
+
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size != AFS_DIR_BLOCK_SIZE) {
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_add_bad_size);
+ return;
+ }
+
+ meta = afs_dir_get_block(&iter, 0);
+ if (!meta)
+ return;
+
+ afs_edit_init_block(meta, meta, 0);
+
+ de = &meta->dirents[slot];
+ de->u.valid = 1;
+ de->u.vnode = htonl(dvnode->fid.vnode);
+ de->u.unique = htonl(dvnode->fid.unique);
+ memcpy(de->u.name, ".", 2);
+ trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+ dvnode->fid.vnode, dvnode->fid.unique, ".");
+ slot++;
+
+ de = &meta->dirents[slot];
+ de->u.valid = 1;
+ de->u.vnode = htonl(parent_dvnode->fid.vnode);
+ de->u.unique = htonl(parent_dvnode->fid.unique);
+ memcpy(de->u.name, "..", 3);
+ trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+ parent_dvnode->fid.vnode, parent_dvnode->fid.unique, "..");
+
+ afs_set_contig_bits(meta, AFS_DIR_RESV_BLOCKS0, 2);
+ meta->meta.alloc_ctrs[0] -= 2;
+ kunmap_local(meta);
+
+ netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
+ set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+}
diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c
new file mode 100644
index 000000000000..b25bd892db4d
--- /dev/null
+++ b/fs/afs/dir_search.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Search a directory's hash table.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/draft-keiser-afs3-directory-object-00
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+
+/*
+ * Calculate the name hash.
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name)
+{
+ const unsigned char *p = name->name;
+ unsigned int hash = 0, i;
+ int bucket;
+
+ for (i = 0; i < name->len; i++)
+ hash = (hash * 173) + p[i];
+ bucket = hash & (AFS_DIR_HASHTBL_SIZE - 1);
+ if (hash > INT_MAX) {
+ bucket = AFS_DIR_HASHTBL_SIZE - bucket;
+ bucket &= (AFS_DIR_HASHTBL_SIZE - 1);
+ }
+ return bucket;
+}
+
+/*
+ * Reset a directory iterator.
+ */
+static bool afs_dir_reset_iter(struct afs_dir_iter *iter)
+{
+ unsigned long long i_size = i_size_read(&iter->dvnode->netfs.inode);
+ unsigned int nblocks;
+
+ /* Work out the maximum number of steps we can take. */
+ nblocks = umin(i_size / AFS_DIR_BLOCK_SIZE, AFS_DIR_MAX_BLOCKS);
+ if (!nblocks)
+ return false;
+ iter->loop_check = nblocks * (AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS);
+ iter->prev_entry = 0; /* Hash head is previous */
+ return true;
+}
+
+/*
+ * Initialise a directory iterator for looking up a name.
+ */
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name)
+{
+ iter->nr_slots = afs_dir_calc_slots(name->len);
+ iter->bucket = afs_dir_hash_name(name);
+ return afs_dir_reset_iter(iter);
+}
+
+/*
+ * Get a specific block.
+ */
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block)
+{
+ struct folio_queue *fq = iter->fq;
+ struct afs_vnode *dvnode = iter->dvnode;
+ struct folio *folio;
+ size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+ size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+ int slot = iter->fq_slot;
+
+ _enter("%zx,%d", block, slot);
+
+ if (iter->block) {
+ kunmap_local(iter->block);
+ iter->block = NULL;
+ }
+
+ if (dvnode->directory_size < blend)
+ goto fail;
+
+ if (!fq || blpos < fpos) {
+ fq = dvnode->directory;
+ slot = 0;
+ fpos = 0;
+ }
+
+ /* Search the folio queue for the folio containing the block... */
+ for (; fq; fq = fq->next) {
+ for (; slot < folioq_count(fq); slot++) {
+ size_t fsize = folioq_folio_size(fq, slot);
+
+ if (blend <= fpos + fsize) {
+ /* ... and then return the mapped block. */
+ folio = folioq_folio(fq, slot);
+ if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+ goto fail;
+ iter->fq = fq;
+ iter->fq_slot = slot;
+ iter->fpos = fpos;
+ iter->block = kmap_local_folio(folio, blpos - fpos);
+ return iter->block;
+ }
+ fpos += fsize;
+ }
+ slot = 0;
+ }
+
+fail:
+ iter->fq = NULL;
+ iter->fq_slot = 0;
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+ return NULL;
+}
+
+/*
+ * Search through a directory bucket.
+ */
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+ struct afs_fid *_fid)
+{
+ const union afs_xdr_dir_block *meta;
+ unsigned int entry;
+ int ret = -ESTALE;
+
+ meta = afs_dir_find_block(iter, 0);
+ if (!meta)
+ return -ESTALE;
+
+ entry = ntohs(meta->meta.hashtable[iter->bucket & (AFS_DIR_HASHTBL_SIZE - 1)]);
+ _enter("%x,%x", iter->bucket, entry);
+
+ while (entry) {
+ const union afs_xdr_dir_block *block;
+ const union afs_xdr_dirent *dire;
+ unsigned int blnum = entry / AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int resv = (blnum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+
+ _debug("search %x", entry);
+
+ if (slot < resv) {
+ kdebug("slot out of range h=%x rs=%2x sl=%2x-%2x",
+ iter->bucket, resv, slot, slot + iter->nr_slots - 1);
+ goto bad;
+ }
+
+ block = afs_dir_find_block(iter, blnum);
+ if (!block)
+ goto bad;
+ dire = &block->dirents[slot];
+
+ if (slot + iter->nr_slots <= AFS_DIR_SLOTS_PER_BLOCK &&
+ memcmp(dire->u.name, name->name, name->len) == 0 &&
+ dire->u.name[name->len] == '\0') {
+ _fid->vnode = ntohl(dire->u.vnode);
+ _fid->unique = ntohl(dire->u.unique);
+ ret = entry;
+ goto found;
+ }
+
+ iter->prev_entry = entry;
+ entry = ntohs(dire->u.hash_next);
+ if (!--iter->loop_check) {
+ kdebug("dir chain loop h=%x", iter->bucket);
+ goto bad;
+ }
+ }
+
+ ret = -ENOENT;
+found:
+ if (iter->block) {
+ kunmap_local(iter->block);
+ iter->block = NULL;
+ }
+
+bad:
+ if (ret == -ESTALE)
+ afs_invalidate_dir(iter->dvnode, afs_dir_invalid_iter_stale);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Search the appropriate hash chain in the contents of an AFS directory.
+ */
+int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
+ struct afs_fid *_fid, afs_dataversion_t *_dir_version)
+{
+ struct afs_dir_iter iter = { .dvnode = dvnode, };
+ int ret, retry_limit = 3;
+
+ _enter("{%lu},,,", dvnode->netfs.inode.i_ino);
+
+ if (!afs_dir_init_iter(&iter, name))
+ return -ENOENT;
+ do {
+ if (--retry_limit < 0) {
+ pr_warn("afs_read_dir(): Too many retries\n");
+ ret = -ESTALE;
+ break;
+ }
+ ret = afs_read_dir(dvnode, NULL);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ break;
+ if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
+ ret = -ESTALE;
+ break;
+ }
+ continue;
+ }
+ *_dir_version = inode_peek_iversion_raw(&dvnode->netfs.inode);
+
+ ret = afs_dir_search_bucket(&iter, name, _fid);
+ up_read(&dvnode->validate_lock);
+ if (ret == -ESTALE)
+ afs_dir_reset_iter(&iter);
+ } while (ret == -ESTALE);
+
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index a1e581946b93..0b80eb93fa40 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -113,16 +113,14 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
sdentry = NULL;
do {
- int slen;
-
dput(sdentry);
sillycounter++;
/* Create a silly name. Note that the ".__afs" prefix is
* understood by the salvager and must not be changed.
*/
- slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+ sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
/* N.B. Better to return EBUSY here ... it could be dangerous
* to delete the file while it's in use.
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index c4d2711e20ad..8c6130789fde 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -10,16 +10,19 @@
#include <linux/dns_resolver.h>
#include "internal.h"
-static atomic_t afs_autocell_ino;
+#define AFS_MIN_DYNROOT_CELL_INO 4 /* Allow for ., .., @cell, .@cell */
+#define AFS_MAX_DYNROOT_CELL_INO ((unsigned int)INT_MAX)
+
+static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino);
/*
* iget5() comparator for inode created by autocell operations
- *
- * These pseudo inodes don't match anything.
*/
static int afs_iget5_pseudo_test(struct inode *inode, void *opaque)
{
- return 0;
+ struct afs_fid *fid = opaque;
+
+ return inode->i_ino == fid->vnode;
}
/*
@@ -39,28 +42,16 @@ static int afs_iget5_pseudo_set(struct inode *inode, void *opaque)
}
/*
- * Create an inode for a dynamic root directory or an autocell dynamic
- * automount dir.
+ * Create an inode for an autocell dynamic automount dir.
*/
-struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
+static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino)
{
- struct afs_super_info *as = AFS_FS_S(sb);
struct afs_vnode *vnode;
struct inode *inode;
- struct afs_fid fid = {};
+ struct afs_fid fid = { .vnode = ino, .unique = 1, };
_enter("");
- if (as->volume)
- fid.vid = as->volume->vid;
- if (root) {
- fid.vnode = 1;
- fid.unique = 1;
- } else {
- fid.vnode = atomic_inc_return(&afs_autocell_ino);
- fid.unique = 0;
- }
-
inode = iget5_locked(sb, fid.vnode,
afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
if (!inode) {
@@ -73,163 +64,75 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
vnode = AFS_FS_I(inode);
- /* there shouldn't be an existing inode */
- BUG_ON(!(inode->i_state & I_NEW));
-
- netfs_inode_init(&vnode->netfs, NULL, false);
- inode->i_size = 0;
- inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
- if (root) {
- inode->i_op = &afs_dynroot_inode_operations;
- inode->i_fop = &simple_dir_operations;
- } else {
- inode->i_op = &afs_autocell_inode_operations;
- }
- set_nlink(inode, 2);
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- simple_inode_init_ts(inode);
- inode->i_blocks = 0;
- inode->i_generation = 0;
-
- set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
- if (!root) {
+ if (inode->i_state & I_NEW) {
+ netfs_inode_init(&vnode->netfs, NULL, false);
+ simple_inode_init_ts(inode);
+ set_nlink(inode, 2);
+ inode->i_size = 0;
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_autocell_inode_operations;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+ inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
+
+ set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
- inode->i_flags |= S_AUTOMOUNT;
- }
- inode->i_flags |= S_NOATIME;
- unlock_new_inode(inode);
+ unlock_new_inode(inode);
+ }
_leave(" = %p", inode);
return inode;
}
/*
- * Probe to see if a cell may exist. This prevents positive dentries from
- * being created unnecessarily.
+ * Try to automount the mountpoint with pseudo directory, if the autocell
+ * option is set.
*/
-static int afs_probe_cell_name(struct dentry *dentry)
+static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
- struct afs_cell *cell;
+ struct afs_cell *cell = NULL;
struct afs_net *net = afs_d2net(dentry);
+ struct inode *inode = NULL;
const char *name = dentry->d_name.name;
size_t len = dentry->d_name.len;
- char *result = NULL;
- int ret;
+ bool dotted = false;
+ int ret = -ENOENT;
/* Names prefixed with a dot are R/W mounts. */
if (name[0] == '.') {
- if (len == 1)
- return -EINVAL;
name++;
len--;
+ dotted = true;
}
- cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe);
- if (!IS_ERR(cell)) {
- afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe);
- return 0;
- }
-
- ret = dns_query(net->net, "afsdb", name, len, "srv=1",
- &result, NULL, false);
- if (ret == -ENODATA || ret == -ENOKEY || ret == 0)
- ret = -ENOENT;
- if (ret > 0 && ret >= sizeof(struct dns_server_list_v1_header)) {
- struct dns_server_list_v1_header *v1 = (void *)result;
-
- if (v1->hdr.zero == 0 &&
- v1->hdr.content == DNS_PAYLOAD_IS_SERVER_LIST &&
- v1->hdr.version == 1 &&
- (v1->status != DNS_LOOKUP_GOOD &&
- v1->status != DNS_LOOKUP_GOOD_WITH_BAD))
- return -ENOENT;
-
+ cell = afs_lookup_cell(net, name, len, NULL, false,
+ afs_cell_trace_use_lookup_dynroot);
+ if (IS_ERR(cell)) {
+ ret = PTR_ERR(cell);
+ goto out_no_cell;
}
- kfree(result);
- return ret;
-}
-
-/*
- * Try to auto mount the mountpoint with pseudo directory, if the autocell
- * operation is setted.
- */
-struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
-{
- struct afs_vnode *vnode = AFS_FS_I(dir);
- struct inode *inode;
- int ret = -ENOENT;
-
- _enter("%p{%pd}, {%llx:%llu}",
- dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
-
- if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
- goto out;
-
- ret = afs_probe_cell_name(dentry);
- if (ret < 0)
- goto out;
-
- inode = afs_iget_pseudo_dir(dir->i_sb, false);
+ inode = afs_iget_pseudo_dir(dir->i_sb, cell->dynroot_ino * 2 + dotted);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto out;
}
- _leave("= %p", inode);
- return inode;
+ dentry->d_fsdata = cell;
+ return d_splice_alias(inode, dentry);
out:
- _leave("= %d", ret);
+ afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_dynroot);
+out_no_cell:
+ if (!inode)
+ return d_splice_alias(inode, dentry);
return ret == -ENOENT ? NULL : ERR_PTR(ret);
}
/*
- * Look up @cell in a dynroot directory. This is a substitution for the
- * local cell name for the net namespace.
- */
-static struct dentry *afs_lookup_atcell(struct dentry *dentry)
-{
- struct afs_cell *cell;
- struct afs_net *net = afs_d2net(dentry);
- struct dentry *ret;
- char *name;
- int len;
-
- if (!net->ws_cell)
- return ERR_PTR(-ENOENT);
-
- ret = ERR_PTR(-ENOMEM);
- name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL);
- if (!name)
- goto out_p;
-
- down_read(&net->cells_lock);
- cell = net->ws_cell;
- if (cell) {
- len = cell->name_len;
- memcpy(name, cell->name, len + 1);
- }
- up_read(&net->cells_lock);
-
- ret = ERR_PTR(-ENOENT);
- if (!cell)
- goto out_n;
-
- ret = lookup_one_len(name, dentry->d_parent, len);
-
- /* We don't want to d_add() the @cell dentry here as we don't want to
- * the cached dentry to hide changes to the local cell name.
- */
-
-out_n:
- kfree(name);
-out_p:
- return ret;
-}
-
-/*
* Look up an entry in a dynroot directory.
*/
static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
@@ -237,8 +140,6 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
{
_enter("%pd", dentry);
- ASSERTCMP(d_inode(dentry), ==, NULL);
-
if (flags & LOOKUP_CREATE)
return ERR_PTR(-EOPNOTSUPP);
@@ -249,141 +150,256 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
if (dentry->d_name.len == 5 &&
memcmp(dentry->d_name.name, "@cell", 5) == 0)
- return afs_lookup_atcell(dentry);
+ return afs_lookup_atcell(dir, dentry, 2);
+
+ if (dentry->d_name.len == 6 &&
+ memcmp(dentry->d_name.name, ".@cell", 6) == 0)
+ return afs_lookup_atcell(dir, dentry, 3);
- return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry);
+ return afs_dynroot_lookup_cell(dir, dentry, flags);
}
const struct inode_operations afs_dynroot_inode_operations = {
.lookup = afs_dynroot_lookup,
};
+static void afs_dynroot_d_release(struct dentry *dentry)
+{
+ struct afs_cell *cell = dentry->d_fsdata;
+
+ afs_unuse_cell(cell, afs_cell_trace_unuse_dynroot_mntpt);
+}
+
+/*
+ * Keep @cell symlink dentries around, but only keep cell autodirs when they're
+ * being used.
+ */
+static int afs_dynroot_delete_dentry(const struct dentry *dentry)
+{
+ const struct qstr *name = &dentry->d_name;
+
+ if (name->len == 5 && memcmp(name->name, "@cell", 5) == 0)
+ return 0;
+ if (name->len == 6 && memcmp(name->name, ".@cell", 6) == 0)
+ return 0;
+ return 1;
+}
+
const struct dentry_operations afs_dynroot_dentry_operations = {
- .d_delete = always_delete_dentry,
- .d_release = afs_d_release,
+ .d_delete = afs_dynroot_delete_dentry,
+ .d_release = afs_dynroot_d_release,
.d_automount = afs_d_automount,
};
+static void afs_atcell_delayed_put_cell(void *arg)
+{
+ struct afs_cell *cell = arg;
+
+ afs_put_cell(cell, afs_cell_trace_put_atcell);
+}
+
/*
- * Create a manually added cell mount directory.
- * - The caller must hold net->proc_cells_lock
+ * Read @cell or .@cell symlinks.
*/
-int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell)
+static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
{
- struct super_block *sb = net->dynroot_sb;
- struct dentry *root, *subdir;
- int ret;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_cell *cell;
+ struct afs_net *net = afs_i2net(inode);
+ const char *name;
+ bool dotted = vnode->fid.vnode == 3;
- if (!sb || atomic_read(&sb->s_active) == 0)
- return 0;
+ if (!rcu_access_pointer(net->ws_cell))
+ return ERR_PTR(-ENOENT);
- /* Let the ->lookup op do the creation */
- root = sb->s_root;
- inode_lock(root->d_inode);
- subdir = lookup_one_len(cell->name, root, cell->name_len);
- if (IS_ERR(subdir)) {
- ret = PTR_ERR(subdir);
- goto unlock;
+ if (!dentry) {
+ /* We're in RCU-pathwalk. */
+ cell = rcu_dereference(net->ws_cell);
+ if (dotted)
+ name = cell->name - 1;
+ else
+ name = cell->name;
+ /* Shouldn't need to set a delayed call. */
+ return name;
}
- /* Note that we're retaining an extra ref on the dentry */
- subdir->d_fsdata = (void *)1UL;
- ret = 0;
-unlock:
- inode_unlock(root->d_inode);
- return ret;
+ down_read(&net->cells_lock);
+
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
+ if (dotted)
+ name = cell->name - 1;
+ else
+ name = cell->name;
+ afs_get_cell(cell, afs_cell_trace_get_atcell);
+ set_delayed_call(done, afs_atcell_delayed_put_cell, cell);
+
+ up_read(&net->cells_lock);
+ return name;
}
+static const struct inode_operations afs_atcell_inode_operations = {
+ .get_link = afs_atcell_get_link,
+};
+
/*
- * Remove a manually added cell mount directory.
- * - The caller must hold net->proc_cells_lock
+ * Create an inode for the @cell or .@cell symlinks.
*/
-void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell)
+static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino)
{
- struct super_block *sb = net->dynroot_sb;
- struct dentry *root, *subdir;
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ struct afs_fid fid = { .vnode = ino, .unique = 1, };
- if (!sb || atomic_read(&sb->s_active) == 0)
- return;
+ inode = iget5_locked(dir->i_sb, fid.vnode,
+ afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
- root = sb->s_root;
- inode_lock(root->d_inode);
+ vnode = AFS_FS_I(inode);
- /* Don't want to trigger a lookup call, which will re-add the cell */
- subdir = try_lookup_one_len(cell->name, root, cell->name_len);
- if (IS_ERR_OR_NULL(subdir)) {
- _debug("lookup %ld", PTR_ERR(subdir));
- goto no_dentry;
+ if (inode->i_state & I_NEW) {
+ netfs_inode_init(&vnode->netfs, NULL, false);
+ simple_inode_init_ts(inode);
+ set_nlink(inode, 1);
+ inode->i_size = 0;
+ inode->i_mode = S_IFLNK | 0555;
+ inode->i_op = &afs_atcell_inode_operations;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+ inode->i_flags |= S_NOATIME;
+
+ unlock_new_inode(inode);
}
+ return d_splice_alias(inode, dentry);
+}
- _debug("rmdir %pd %u", subdir, d_count(subdir));
+/*
+ * Transcribe the cell database into readdir content under the RCU read lock.
+ * Each cell produces two entries, one prefixed with a dot and one not.
+ */
+static int afs_dynroot_readdir_cells(struct afs_net *net, struct dir_context *ctx)
+{
+ const struct afs_cell *cell;
+ loff_t newpos;
+
+ _enter("%llu", ctx->pos);
+
+ for (;;) {
+ unsigned int ix = ctx->pos >> 1;
+
+ cell = idr_get_next(&net->cells_dyn_ino, &ix);
+ if (!cell)
+ return 0;
+ if (READ_ONCE(cell->state) == AFS_CELL_REMOVING ||
+ READ_ONCE(cell->state) == AFS_CELL_DEAD) {
+ ctx->pos += 2;
+ ctx->pos &= ~1;
+ continue;
+ }
+
+ newpos = ix << 1;
+ if (newpos > ctx->pos)
+ ctx->pos = newpos;
- if (subdir->d_fsdata) {
- _debug("unpin %u", d_count(subdir));
- subdir->d_fsdata = NULL;
- dput(subdir);
+ _debug("pos %llu -> cell %u", ctx->pos, cell->dynroot_ino);
+
+ if ((ctx->pos & 1) == 0) {
+ if (!dir_emit(ctx, cell->name, cell->name_len,
+ cell->dynroot_ino, DT_DIR))
+ return 0;
+ ctx->pos++;
+ }
+ if ((ctx->pos & 1) == 1) {
+ if (!dir_emit(ctx, cell->name - 1, cell->name_len + 1,
+ cell->dynroot_ino + 1, DT_DIR))
+ return 0;
+ ctx->pos++;
+ }
}
- dput(subdir);
-no_dentry:
- inode_unlock(root->d_inode);
- _leave("");
+ return 0;
}
/*
- * Populate a newly created dynamic root with cell names.
+ * Read the AFS dynamic root directory. This produces a list of cellnames,
+ * dotted and undotted, along with @cell and .@cell links if configured.
*/
-int afs_dynroot_populate(struct super_block *sb)
+static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx)
{
- struct afs_cell *cell;
- struct afs_net *net = afs_sb2net(sb);
- int ret;
+ struct afs_net *net = afs_d2net(file->f_path.dentry);
+ int ret = 0;
- mutex_lock(&net->proc_cells_lock);
+ if (!dir_emit_dots(file, ctx))
+ return 0;
- net->dynroot_sb = sb;
- hlist_for_each_entry(cell, &net->proc_cells, proc_link) {
- ret = afs_dynroot_mkdir(net, cell);
- if (ret < 0)
- goto error;
+ if (ctx->pos == 2) {
+ if (rcu_access_pointer(net->ws_cell) &&
+ !dir_emit(ctx, "@cell", 5, 2, DT_LNK))
+ return 0;
+ ctx->pos = 3;
+ }
+ if (ctx->pos == 3) {
+ if (rcu_access_pointer(net->ws_cell) &&
+ !dir_emit(ctx, ".@cell", 6, 3, DT_LNK))
+ return 0;
+ ctx->pos = 4;
}
- ret = 0;
-out:
- mutex_unlock(&net->proc_cells_lock);
+ if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) {
+ down_read(&net->cells_lock);
+ ret = afs_dynroot_readdir_cells(net, ctx);
+ up_read(&net->cells_lock);
+ }
return ret;
-
-error:
- net->dynroot_sb = NULL;
- goto out;
}
+static const struct file_operations afs_dynroot_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = afs_dynroot_readdir,
+ .fsync = noop_fsync,
+};
+
/*
- * When a dynamic root that's in the process of being destroyed, depopulate it
- * of pinned directories.
+ * Create an inode for a dynamic root directory.
*/
-void afs_dynroot_depopulate(struct super_block *sb)
+struct inode *afs_dynroot_iget_root(struct super_block *sb)
{
- struct afs_net *net = afs_sb2net(sb);
- struct dentry *root = sb->s_root, *subdir;
-
- /* Prevent more subdirs from being created */
- mutex_lock(&net->proc_cells_lock);
- if (net->dynroot_sb == sb)
- net->dynroot_sb = NULL;
- mutex_unlock(&net->proc_cells_lock);
-
- if (root) {
- struct hlist_node *n;
- inode_lock(root->d_inode);
-
- /* Remove all the pins for dirs created for manually added cells */
- hlist_for_each_entry_safe(subdir, n, &root->d_children, d_sib) {
- if (subdir->d_fsdata) {
- subdir->d_fsdata = NULL;
- dput(subdir);
- }
- }
+ struct afs_super_info *as = AFS_FS_S(sb);
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ struct afs_fid fid = { .vid = 0, .vnode = 1, .unique = 1,};
+
+ if (as->volume)
+ fid.vid = as->volume->vid;
- inode_unlock(root->d_inode);
+ inode = iget5_locked(sb, fid.vnode,
+ afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ vnode = AFS_FS_I(inode);
+
+ /* there shouldn't be an existing inode */
+ if (inode->i_state & I_NEW) {
+ netfs_inode_init(&vnode->netfs, NULL, false);
+ simple_inode_init_ts(inode);
+ set_nlink(inode, 2);
+ inode->i_size = 0;
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_dynroot_inode_operations;
+ inode->i_fop = &afs_dynroot_file_operations;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+ inode->i_flags |= S_NOATIME;
+
+ set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
+ unlock_new_inode(inode);
}
+ _leave(" = %p", inode);
+ return inode;
}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ef2cc8f565d2..fc15497608c6 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -16,10 +16,10 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
-static int afs_symlink_read_folio(struct file *file, struct folio *folio);
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
@@ -54,20 +54,12 @@ const struct address_space_operations afs_file_aops = {
.read_folio = netfs_read_folio,
.readahead = netfs_readahead,
.dirty_folio = netfs_dirty_folio,
- .launder_folio = netfs_launder_folio,
.release_folio = netfs_release_folio,
.invalidate_folio = netfs_invalidate_folio,
.migrate_folio = filemap_migrate_folio,
.writepages = afs_writepages,
};
-const struct address_space_operations afs_symlink_aops = {
- .read_folio = afs_symlink_read_folio,
- .release_folio = netfs_release_folio,
- .invalidate_folio = netfs_invalidate_folio,
- .migrate_folio = filemap_migrate_folio,
-};
-
static const struct vm_operations_struct afs_vm_ops = {
.open = afs_vm_open,
.close = afs_vm_close,
@@ -208,47 +200,12 @@ int afs_release(struct inode *inode, struct file *file)
return ret;
}
-/*
- * Allocate a new read record.
- */
-struct afs_read *afs_alloc_read(gfp_t gfp)
-{
- struct afs_read *req;
-
- req = kzalloc(sizeof(struct afs_read), gfp);
- if (req)
- refcount_set(&req->usage, 1);
-
- return req;
-}
-
-/*
- * Dispose of a ref to a read record.
- */
-void afs_put_read(struct afs_read *req)
-{
- if (refcount_dec_and_test(&req->usage)) {
- if (req->cleanup)
- req->cleanup(req);
- key_put(req->key);
- kfree(req);
- }
-}
-
static void afs_fetch_data_notify(struct afs_operation *op)
{
- struct afs_read *req = op->fetch.req;
- struct netfs_io_subrequest *subreq = req->subreq;
- int error = afs_op_error(op);
-
- req->error = error;
- if (subreq) {
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
- req->subreq = NULL;
- } else if (req->done) {
- req->done(req);
- }
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
+
+ subreq->error = afs_op_error(op);
+ netfs_read_subreq_terminated(subreq);
}
static void afs_fetch_data_success(struct afs_operation *op)
@@ -258,103 +215,198 @@ static void afs_fetch_data_success(struct afs_operation *op)
_enter("op=%08x", op->debug_id);
afs_vnode_commit_status(op, &op->file[0]);
afs_stat_v(vnode, n_fetches);
- atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
+ atomic_long_add(op->fetch.subreq->transferred, &op->net->n_fetch_bytes);
afs_fetch_data_notify(op);
}
-static void afs_fetch_data_put(struct afs_operation *op)
+static void afs_fetch_data_aborted(struct afs_operation *op)
{
- op->fetch.req->error = afs_op_error(op);
- afs_put_read(op->fetch.req);
+ afs_check_for_remote_deletion(op);
+ afs_fetch_data_notify(op);
}
-static const struct afs_operation_ops afs_fetch_data_operation = {
+const struct afs_operation_ops afs_fetch_data_operation = {
.issue_afs_rpc = afs_fs_fetch_data,
.issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success,
- .aborted = afs_check_for_remote_deletion,
+ .aborted = afs_fetch_data_aborted,
.failed = afs_fetch_data_notify,
- .put = afs_fetch_data_put,
};
+static void afs_issue_read_call(struct afs_operation *op)
+{
+ op->call_responded = false;
+ op->call_error = 0;
+ op->call_abort_code = 0;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags))
+ yfs_fs_fetch_data(op);
+ else
+ afs_fs_fetch_data(op);
+}
+
+static void afs_end_read(struct afs_operation *op)
+{
+ if (op->call_responded && op->server)
+ set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+ if (!afs_op_error(op))
+ afs_fetch_data_success(op);
+ else if (op->cumul_error.aborted)
+ afs_fetch_data_aborted(op);
+ else
+ afs_fetch_data_notify(op);
+
+ afs_end_vnode_operation(op);
+ afs_put_operation(op);
+}
+
+/*
+ * Perform I/O processing on an asynchronous call. The work item carries a ref
+ * to the call struct that we either need to release or to pass on.
+ */
+static void afs_read_receive(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ enum afs_call_state state;
+
+ _enter("");
+
+ state = READ_ONCE(call->state);
+ if (state == AFS_CALL_COMPLETE)
+ return;
+ trace_afs_read_recv(op, call);
+
+ while (state < AFS_CALL_COMPLETE && READ_ONCE(call->need_attention)) {
+ WRITE_ONCE(call->need_attention, false);
+ afs_deliver_to_call(call);
+ state = READ_ONCE(call->state);
+ }
+
+ if (state < AFS_CALL_COMPLETE) {
+ netfs_read_subreq_progress(op->fetch.subreq);
+ if (rxrpc_kernel_check_life(call->net->socket, call->rxcall))
+ return;
+ /* rxrpc terminated the call. */
+ afs_set_call_complete(call, call->error, call->abort_code);
+ }
+
+ op->call_abort_code = call->abort_code;
+ op->call_error = call->error;
+ op->call_responded = call->responded;
+ op->call = NULL;
+ call->op = NULL;
+ afs_put_call(call);
+
+ /* If the call failed, then we need to crank the server rotation
+ * handle and try the next.
+ */
+ if (afs_select_fileserver(op)) {
+ afs_issue_read_call(op);
+ return;
+ }
+
+ afs_end_read(op);
+}
+
+void afs_fetch_data_async_rx(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
+ afs_read_receive(call);
+ afs_put_call(call);
+}
+
+void afs_fetch_data_immediate_cancel(struct afs_call *call)
+{
+ if (call->async) {
+ afs_get_call(call, afs_call_trace_wake);
+ if (!queue_work(afs_async_calls, &call->async_work))
+ afs_deferred_put_call(call);
+ flush_work(&call->async_work);
+ }
+}
+
/*
* Fetch file data from the volume.
*/
-int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
+static void afs_issue_read(struct netfs_io_subrequest *subreq)
{
struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+ struct key *key = subreq->rreq->netfs_priv;
_enter("%s{%llx:%llu.%u},%x,,,",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- key_serial(req->key));
+ key_serial(key));
- op = afs_alloc_operation(req->key, vnode->volume);
+ op = afs_alloc_operation(key, vnode->volume);
if (IS_ERR(op)) {
- if (req->subreq)
- netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
- return PTR_ERR(op);
+ subreq->error = PTR_ERR(op);
+ netfs_read_subreq_terminated(subreq);
+ return;
}
afs_op_set_vnode(op, 0, vnode);
- op->fetch.req = afs_get_read(req);
+ op->fetch.subreq = subreq;
op->ops = &afs_fetch_data_operation;
- return afs_do_sync_operation(op);
-}
-static void afs_issue_read(struct netfs_io_subrequest *subreq)
-{
- struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
- struct afs_read *fsreq;
-
- fsreq = afs_alloc_read(GFP_NOFS);
- if (!fsreq)
- return netfs_subreq_terminated(subreq, -ENOMEM, false);
-
- fsreq->subreq = subreq;
- fsreq->pos = subreq->start + subreq->transferred;
- fsreq->len = subreq->len - subreq->transferred;
- fsreq->key = key_get(subreq->rreq->netfs_priv);
- fsreq->vnode = vnode;
- fsreq->iter = &subreq->io_iter;
-
- afs_fetch_data(fsreq->vnode, fsreq);
- afs_put_read(fsreq);
-}
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
-static int afs_symlink_read_folio(struct file *file, struct folio *folio)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host);
- struct afs_read *fsreq;
- int ret;
+ if (subreq->rreq->origin == NETFS_READAHEAD ||
+ subreq->rreq->iocb) {
+ op->flags |= AFS_OPERATION_ASYNC;
- fsreq = afs_alloc_read(GFP_NOFS);
- if (!fsreq)
- return -ENOMEM;
+ if (!afs_begin_vnode_operation(op)) {
+ subreq->error = afs_put_operation(op);
+ netfs_read_subreq_terminated(subreq);
+ return;
+ }
- fsreq->pos = folio_pos(folio);
- fsreq->len = folio_size(folio);
- fsreq->vnode = vnode;
- fsreq->iter = &fsreq->def_iter;
- iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages,
- fsreq->pos, fsreq->len);
+ if (!afs_select_fileserver(op)) {
+ afs_end_read(op);
+ return;
+ }
- ret = afs_fetch_data(fsreq->vnode, fsreq);
- if (ret == 0)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return ret;
+ afs_issue_read_call(op);
+ } else {
+ afs_do_sync_operation(op);
+ }
}
static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
+
if (file)
rreq->netfs_priv = key_get(afs_file_key(file));
rreq->rsize = 256 * 1024;
- rreq->wsize = 256 * 1024;
+ rreq->wsize = 256 * 1024 * 1024;
+
+ switch (rreq->origin) {
+ case NETFS_READ_SINGLE:
+ if (!file) {
+ struct key *key = afs_request_key(vnode->volume->cell);
+
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ rreq->netfs_priv = key;
+ }
+ break;
+ case NETFS_WRITEBACK:
+ case NETFS_WRITETHROUGH:
+ case NETFS_UNBUFFERED_WRITE:
+ case NETFS_DIO_WRITE:
+ if (S_ISREG(rreq->inode->i_mode))
+ rreq->io_streams[0].avail = true;
+ break;
+ case NETFS_WRITEBACK_SINGLE:
+ default:
+ break;
+ }
return 0;
}
@@ -369,6 +421,7 @@ static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
static void afs_free_request(struct netfs_io_request *rreq)
{
key_put(rreq->netfs_priv);
+ afs_put_wb_key(rreq->netfs_priv2);
}
static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
@@ -400,7 +453,10 @@ const struct netfs_request_ops afs_req_ops = {
.issue_read = afs_issue_read,
.update_i_size = afs_update_i_size,
.invalidate_cache = afs_netfs_invalidate_cache,
- .create_write_requests = afs_create_write_requests,
+ .begin_writeback = afs_begin_writeback,
+ .prepare_write = afs_prepare_write,
+ .issue_write = afs_issue_write,
+ .retry_request = afs_retry_request,
};
static void afs_add_open_mmap(struct afs_vnode *vnode)
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 3546b087e791..8418813ee043 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -49,6 +49,105 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo
return op;
}
+struct afs_io_locker {
+ struct list_head link;
+ struct task_struct *task;
+ unsigned long have_lock;
+};
+
+/*
+ * Unlock the I/O lock on a vnode.
+ */
+static void afs_unlock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker *locker;
+
+ spin_lock(&vnode->lock);
+ locker = list_first_entry_or_null(&vnode->io_lock_waiters,
+ struct afs_io_locker, link);
+ if (locker) {
+ list_del(&locker->link);
+ smp_store_release(&locker->have_lock, 1); /* The unlock barrier. */
+ smp_mb__after_atomic(); /* Store have_lock before task state */
+ wake_up_process(locker->task);
+ } else {
+ clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
+ }
+ spin_unlock(&vnode->lock);
+}
+
+/*
+ * Lock the I/O lock on a vnode uninterruptibly. We can't use an ordinary
+ * mutex as lockdep will complain if we unlock it in the wrong thread.
+ */
+static void afs_lock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock)) /* The lock barrier */
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Lock the I/O lock on a vnode interruptibly. We can't use an ordinary mutex
+ * as lockdep will complain if we unlock it in the wrong thread.
+ */
+static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+ int ret = 0;
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return 0;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock) || /* The lock barrier */
+ signal_pending(current))
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ /* If we got a signal, try to transfer the lock onto the next
+ * waiter.
+ */
+ if (unlikely(signal_pending(current))) {
+ spin_lock(&vnode->lock);
+ if (myself.have_lock) {
+ spin_unlock(&vnode->lock);
+ afs_unlock_for_io(vnode);
+ } else {
+ list_del(&myself.link);
+ spin_unlock(&vnode->lock);
+ }
+ ret = -ERESTARTSYS;
+ }
+ return ret;
+}
+
/*
* Lock the vnode(s) being operated upon.
*/
@@ -60,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_UNINTR) {
- mutex_lock(&vnode->io_lock);
+ afs_lock_for_io(vnode);
op->flags |= AFS_OPERATION_LOCK_0;
_leave(" = t [1]");
return true;
@@ -72,7 +171,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
if (vnode2 > vnode)
swap(vnode, vnode2);
- if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ if (afs_lock_for_io_interruptible(vnode) < 0) {
afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
_leave(" = f [I 0]");
@@ -81,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op)
op->flags |= AFS_OPERATION_LOCK_0;
if (vnode2) {
- if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
+ if (afs_lock_for_io_interruptible(vnode2) < 0) {
afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
op->flags &= ~AFS_OPERATION_LOCK_0;
_leave(" = f [I 1]");
return false;
@@ -104,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_LOCK_1)
- mutex_unlock(&vnode2->io_lock);
+ afs_unlock_for_io(vnode2);
if (op->flags & AFS_OPERATION_LOCK_0)
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
}
static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
@@ -157,7 +256,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
/*
* Tidy up a filesystem cursor and unlock the vnode.
*/
-static void afs_end_vnode_operation(struct afs_operation *op)
+void afs_end_vnode_operation(struct afs_operation *op)
{
_enter("");
@@ -201,7 +300,7 @@ void afs_wait_for_operation(struct afs_operation *op)
}
}
- if (op->call_responded)
+ if (op->call_responded && op->server)
set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
if (!afs_op_error(op)) {
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index 580de4adaaf6..e0030ac74ea0 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -235,20 +235,20 @@ out:
* Probe all of a fileserver's addresses to find out the best route and to
* query its capabilities.
*/
-void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
- struct afs_addr_list *new_alist, struct key *key)
+int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct afs_addr_list *new_alist, struct key *key)
{
struct afs_endpoint_state *estate, *old;
- struct afs_addr_list *alist;
+ struct afs_addr_list *old_alist = NULL, *alist;
unsigned long unprobed;
_enter("%pU", &server->uuid);
estate = kzalloc(sizeof(*estate), GFP_KERNEL);
if (!estate)
- return;
+ return -ENOMEM;
- refcount_set(&estate->ref, 1);
+ refcount_set(&estate->ref, 2);
estate->server_id = server->debug_id;
estate->rtt = UINT_MAX;
@@ -256,21 +256,31 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
old = rcu_dereference_protected(server->endpoint_state,
lockdep_is_held(&server->fs_lock));
- estate->responsive_set = old->responsive_set;
- estate->addresses = afs_get_addrlist(new_alist ?: old->addresses,
- afs_alist_trace_get_estate);
+ if (old) {
+ estate->responsive_set = old->responsive_set;
+ if (!new_alist)
+ new_alist = old->addresses;
+ }
+
+ if (old_alist != new_alist)
+ afs_set_peer_appdata(server, old_alist, new_alist);
+
+ estate->addresses = afs_get_addrlist(new_alist, afs_alist_trace_get_estate);
alist = estate->addresses;
estate->probe_seq = ++server->probe_counter;
atomic_set(&estate->nr_probing, alist->nr_addrs);
+ if (new_alist)
+ server->addr_version = new_alist->version;
rcu_assign_pointer(server->endpoint_state, estate);
- set_bit(AFS_ESTATE_SUPERSEDED, &old->flags);
write_unlock(&server->fs_lock);
+ if (old)
+ set_bit(AFS_ESTATE_SUPERSEDED, &old->flags);
trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
afs_estate_trace_alloc_probe);
- afs_get_address_preferences(net, alist);
+ afs_get_address_preferences(net, new_alist);
server->probed_at = jiffies;
unprobed = (1UL << alist->nr_addrs) - 1;
@@ -293,6 +303,8 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
}
afs_put_endpoint_state(old, afs_estate_trace_put_probe);
+ afs_put_endpoint_state(estate, afs_estate_trace_put_probe);
+ return 0;
}
/*
@@ -506,10 +518,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta
finish_wait(&server->probe_wq, &wait);
dont_wait:
- if (estate->responsive_set & ~exclude)
- return 1;
if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
return 0;
+ if (estate->responsive_set & ~exclude)
+ return 1;
if (is_intr && signal_pending(current))
return -ERESTARTSYS;
if (timo == 0)
@@ -522,6 +534,6 @@ dont_wait:
*/
void afs_fs_probe_cleanup(struct afs_net *net)
{
- if (del_timer_sync(&net->fs_probe_timer))
+ if (timer_delete_sync(&net->fs_probe_timer))
afs_dec_servers_outstanding(net);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 79cd30775b7a..bc9556991d7c 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -301,18 +301,19 @@ void afs_fs_fetch_status(struct afs_operation *op)
static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
struct afs_operation *op = call->op;
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
const __be32 *bp;
+ size_t count_before;
int ret;
_enter("{%u,%zu,%zu/%llu}",
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
- req->actual_len);
+ call->remaining);
switch (call->unmarshall) {
case 0:
- req->actual_len = 0;
+ call->remaining = 0;
call->unmarshall++;
if (call->operation_ID == FSFETCHDATA64) {
afs_extract_to_tmp64(call);
@@ -322,8 +323,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
}
fallthrough;
- /* Extract the returned data length into
- * ->actual_len. This may indicate more or less data than was
+ /* Extract the returned data length into ->remaining.
+ * This may indicate more or less data than was
* requested will be returned.
*/
case 1:
@@ -332,38 +333,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
if (ret < 0)
return ret;
- req->actual_len = be64_to_cpu(call->tmp64);
- _debug("DATA length: %llu", req->actual_len);
+ call->remaining = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", call->remaining);
- if (req->actual_len == 0)
+ if (call->remaining == 0)
goto no_more_data;
- call->iter = req->iter;
- call->iov_len = min(req->actual_len, req->len);
+ call->iter = &subreq->io_iter;
+ call->iov_len = umin(call->remaining, subreq->len - subreq->transferred);
call->unmarshall++;
fallthrough;
/* extract the returned data */
case 2:
- _debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->actual_len);
+ count_before = call->iov_len;
+ _debug("extract data %zu/%llu", count_before, call->remaining);
ret = afs_extract_data(call, true);
+ subreq->transferred += count_before - call->iov_len;
+ call->remaining -= count_before - call->iov_len;
if (ret < 0)
return ret;
call->iter = &call->def_iter;
- if (req->actual_len <= req->len)
+ if (call->remaining)
goto no_more_data;
/* Discard any excess data the server gave us */
- afs_extract_discard(call, req->actual_len - req->len);
+ afs_extract_discard(call, call->remaining);
call->unmarshall = 3;
fallthrough;
case 3:
_debug("extract discard %zu/%llu",
- iov_iter_count(call->iter), req->actual_len - req->len);
+ iov_iter_count(call->iter), call->remaining);
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -385,8 +388,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
xdr_decode_AFSCallBack(&bp, call, &vp->scb);
xdr_decode_AFSVolSync(&bp, &op->volsync);
- req->data_version = vp->scb.status.data_version;
- req->file_size = vp->scb.status.size;
+ if (subreq->start + subreq->transferred >= vp->scb.status.size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
call->unmarshall++;
fallthrough;
@@ -405,14 +408,18 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
static const struct afs_call_type afs_RXFSFetchData = {
.name = "FS.FetchData",
.op = afs_FS_FetchData,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = afs_deliver_fs_fetch_data,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSFetchData64 = {
.name = "FS.FetchData64",
.op = afs_FS_FetchData64,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = afs_deliver_fs_fetch_data,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
@@ -421,8 +428,8 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
*/
static void afs_fs_fetch_data64(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
struct afs_call *call;
__be32 *bp;
@@ -432,16 +439,19 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
+ if (op->flags & AFS_OPERATION_ASYNC)
+ call->async = true;
+
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA64);
bp[1] = htonl(vp->fid.vid);
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
- bp[4] = htonl(upper_32_bits(req->pos));
- bp[5] = htonl(lower_32_bits(req->pos));
+ bp[4] = htonl(upper_32_bits(subreq->start + subreq->transferred));
+ bp[5] = htonl(lower_32_bits(subreq->start + subreq->transferred));
bp[6] = 0;
- bp[7] = htonl(lower_32_bits(req->len));
+ bp[7] = htonl(lower_32_bits(subreq->len - subreq->transferred));
call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
@@ -453,9 +463,9 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
*/
void afs_fs_fetch_data(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
- struct afs_read *req = op->fetch.req;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
@@ -467,16 +477,14 @@ void afs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- req->call_debug_id = call->debug_id;
-
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA);
bp[1] = htonl(vp->fid.vid);
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
- bp[4] = htonl(lower_32_bits(req->pos));
- bp[5] = htonl(lower_32_bits(req->len));
+ bp[4] = htonl(lower_32_bits(subreq->start + subreq->transferred));
+ bp[5] = htonl(lower_32_bits(subreq->len + subreq->transferred));
call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
@@ -1645,7 +1653,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
bp = call->request;
*bp++ = htonl(FSGIVEUPALLCALLBACKS);
- call->server = afs_use_server(server, afs_server_trace_give_up_cb);
+ call->server = afs_use_server(server, false, afs_server_trace_use_give_up_cb);
afs_make_call(call, GFP_NOFS);
afs_wait_for_call_to_complete(call);
ret = call->error;
@@ -1728,6 +1736,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
.done = afs_fileserver_probe_result,
+ .immediate_cancel = afs_fileserver_probe_result,
.destructor = afs_fs_get_capabilities_destructor,
};
@@ -1751,7 +1760,7 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
return false;
call->key = key;
- call->server = afs_use_server(server, afs_server_trace_get_caps);
+ call->server = afs_use_server(server, false, afs_server_trace_use_get_caps);
call->peer = rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer);
call->probe = afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps);
call->probe_index = addr_index;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 94fc049aff58..e9538e91f848 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -25,8 +25,94 @@
#include "internal.h"
#include "afs_fs.h"
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
+{
+ size_t size = strlen(op->create.symlink) + 1;
+ size_t dsize = 0;
+ char *p;
+
+ if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
+ mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
+ return;
+
+ vnode->directory_size = dsize;
+ p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+ memcpy(p, op->create.symlink, size);
+ kunmap_local(p);
+ set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+}
+
+static void afs_put_link(void *arg)
+{
+ struct folio *folio = virt_to_folio(arg);
+
+ kunmap_local(arg);
+ folio_put(folio);
+}
+
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct folio *folio;
+ char *content;
+ ssize_t ret;
+
+ if (!dentry) {
+ /* RCU pathwalk. */
+ if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
+ return ERR_PTR(-ECHILD);
+ goto good;
+ }
+
+ if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+ goto fetch;
+
+ ret = afs_validate(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+ test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+ goto good;
+
+fetch:
+ ret = afs_read_single(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+
+good:
+ folio = folioq_folio(vnode->directory, 0);
+ folio_get(folio);
+ content = kmap_local_folio(folio, 0);
+ set_delayed_call(callback, afs_put_link, content);
+ return content;
+}
+
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ DEFINE_DELAYED_CALL(done);
+ const char *content;
+ int len;
+
+ content = afs_get_link(dentry, d_inode(dentry), &done);
+ if (IS_ERR(content)) {
+ do_delayed_call(&done);
+ return PTR_ERR(content);
+ }
+
+ len = umin(strlen(content), buflen);
+ if (copy_to_user(buffer, content, len))
+ len = -EFAULT;
+ do_delayed_call(&done);
+ return len;
+}
+
static const struct inode_operations afs_symlink_inode_operations = {
- .get_link = page_get_link,
+ .get_link = afs_get_link,
+ .readlink = afs_readlink,
};
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
@@ -110,7 +196,9 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
- mapping_set_large_folios(inode->i_mapping);
+ __set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
+ /* Assume locally cached directory data will be valid. */
+ __set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
break;
case AFS_FTYPE_SYMLINK:
/* Symlinks with a mode of 0644 are actually mountpoints. */
@@ -122,13 +210,13 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_mode = S_IFDIR | 0555;
inode->i_op = &afs_mntpt_inode_operations;
inode->i_fop = &afs_mntpt_file_operations;
- inode->i_mapping->a_ops = &afs_symlink_aops;
} else {
inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
- inode->i_mapping->a_ops = &afs_symlink_aops;
}
+ inode->i_mapping->a_ops = &afs_dir_aops;
inode_nohighmem(inode);
+ mapping_set_release_always(inode->i_mapping);
break;
default:
dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
@@ -140,15 +228,17 @@ static int afs_inode_init_from_status(struct afs_operation *op,
afs_set_netfs_context(vnode);
vnode->invalid_before = status->data_version;
+ trace_afs_set_dv(vnode, status->data_version);
inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
if (!vp->scb.have_cb) {
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
+ afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink);
} else {
vnode->cb_server = op->server;
- atomic64_set(&vnode->cb_expires_at, vp->scb.callback.expires_at);
+ afs_set_cb_promise(vnode, vp->scb.callback.expires_at,
+ afs_cb_promise_set_new_inode);
}
write_sequnlock(&vnode->cb_lock);
@@ -207,12 +297,17 @@ static void afs_apply_status(struct afs_operation *op,
if (vp->update_ctime)
inode_set_ctime_to_ts(inode, op->ctime);
- if (vnode->status.data_version != status->data_version)
+ if (vnode->status.data_version != status->data_version) {
+ trace_afs_set_dv(vnode, status->data_version);
data_changed = true;
+ }
vnode->status = *status;
if (vp->dv_before + vp->dv_delta != status->data_version) {
+ trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta,
+ status->data_version);
+
if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
@@ -223,12 +318,10 @@ static void afs_apply_status(struct afs_operation *op,
op->debug_id);
vnode->invalid_before = status->data_version;
- if (vnode->status.type == AFS_FTYPE_DIR) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- afs_stat_v(vnode, n_inval);
- } else {
+ if (vnode->status.type == AFS_FTYPE_DIR)
+ afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch);
+ else
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
- }
change_size = true;
data_changed = true;
unexpected_jump = true;
@@ -258,6 +351,8 @@ static void afs_apply_status(struct afs_operation *op,
inode_set_ctime_to_ts(inode, t);
inode_set_atime_to_ts(inode, t);
}
+ if (op->ops == &afs_fetch_data_operation)
+ op->fetch.subreq->rreq->i_size = status->size;
}
}
@@ -273,7 +368,7 @@ static void afs_apply_callback(struct afs_operation *op,
if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
if (op->volume->type == AFSVL_RWVOL)
vnode->cb_server = op->server;
- atomic64_set(&vnode->cb_expires_at, cb->expires_at);
+ afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb);
}
}
@@ -435,7 +530,9 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
} __packed key;
struct afs_vnode_cache_aux aux;
- if (vnode->status.type != AFS_FTYPE_FILE) {
+ if (vnode->status.type != AFS_FTYPE_FILE &&
+ vnode->status.type != AFS_FTYPE_DIR &&
+ vnode->status.type != AFS_FTYPE_SYMLINK) {
vnode->netfs.cache = NULL;
return;
}
@@ -512,7 +609,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque)
struct afs_vnode *vnode = AFS_FS_I(inode);
vnode->volume = as->volume;
- vnode->fid.vid = as->volume->vid,
+ vnode->fid.vid = as->volume->vid;
vnode->fid.vnode = 1;
vnode->fid.unique = 1;
inode->i_ino = 1;
@@ -545,7 +642,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key)
BUG_ON(!(inode->i_state & I_NEW));
vnode = AFS_FS_I(inode);
- vnode->cb_v_check = atomic_read(&as->volume->cb_v_break),
+ vnode->cb_v_check = atomic_read(&as->volume->cb_v_break);
afs_set_netfs_context(vnode);
op = afs_alloc_operation(key, as->volume);
@@ -637,6 +734,7 @@ int afs_drop_inode(struct inode *inode)
void afs_evict_inode(struct inode *inode)
{
struct afs_vnode_cache_aux aux;
+ struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
struct afs_vnode *vnode = AFS_FS_I(inode);
_enter("{%llx:%llu.%d}",
@@ -648,7 +746,22 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+ if ((S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) &&
+ (inode->i_state & I_DIRTY) &&
+ !sbi->dyn_root) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .for_sync = true,
+ .range_end = LLONG_MAX,
+ };
+
+ afs_single_writepages(inode->i_mapping, &wbc);
+ }
+
+ netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
+ netfs_free_folioq_buffer(vnode->directory);
afs_set_cache_aux(vnode, &aux);
netfs_clear_inode_writeback(inode, &aux);
@@ -694,13 +807,18 @@ static void afs_setattr_edit_file(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_vnode *vnode = vp->vnode;
+ struct inode *inode = &vnode->netfs.inode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size;
- loff_t i_size = op->setattr.old_i_size;
+ loff_t old = op->setattr.old_i_size;
+
+ /* Note: inode->i_size was updated by afs_apply_status() inside
+ * the I/O and callback locks.
+ */
- if (size != i_size) {
- truncate_setsize(&vnode->netfs.inode, size);
+ if (size != old) {
+ truncate_pagecache(inode, size);
netfs_resize_file(&vnode->netfs, size, true);
fscache_resize_cookie(afs_vnode_cache(vnode), size);
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6ce5a612937c..1124ea4000cb 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -20,6 +20,7 @@
#include <linux/uuid.h>
#include <linux/mm_types.h>
#include <linux/dns_resolver.h>
+#include <crypto/krb5.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -130,6 +131,7 @@ struct afs_call {
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
+ struct work_struct free_work; /* Deferred free processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* security for this call */
@@ -162,6 +164,7 @@ struct afs_call {
spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
+ unsigned long long remaining; /* How much is left to receive */
unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
@@ -174,8 +177,10 @@ struct afs_call {
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
bool responded; /* Got a response from the call (may be abort) */
+ u8 security_ix; /* Security class */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
+ u32 enctype; /* Security encoding type */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
union { /* place to extract temporary data */
@@ -200,11 +205,17 @@ struct afs_call_type {
/* clean up a call */
void (*destructor)(struct afs_call *call);
+ /* Async receive processing function */
+ void (*async_rx)(struct work_struct *work);
+
/* Work function */
void (*work)(struct work_struct *work);
/* Call done function (gets called immediately on success or failure) */
void (*done)(struct afs_call *call);
+
+ /* Handle a call being immediately cancelled. */
+ void (*immediate_cancel)(struct afs_call *call);
};
/*
@@ -232,28 +243,6 @@ static inline struct key *afs_file_key(struct file *file)
}
/*
- * Record of an outstanding read operation on a vnode.
- */
-struct afs_read {
- loff_t pos; /* Where to start reading */
- loff_t len; /* How much we're asking for */
- loff_t actual_len; /* How much we're actually getting */
- loff_t file_size; /* File size returned by server */
- struct key *key; /* The key to use to reissue the read */
- struct afs_vnode *vnode; /* The file being read into. */
- struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */
- afs_dataversion_t data_version; /* Version number returned by server */
- refcount_t usage;
- unsigned int call_debug_id;
- unsigned int nr_pages;
- int error;
- void (*done)(struct afs_read *);
- void (*cleanup)(struct afs_read *);
- struct iov_iter *iter; /* Iterator representing the buffer */
- struct iov_iter def_iter; /* Default iterator */
-};
-
-/*
* AFS superblock private data
* - there's one superblock per volume
*/
@@ -295,15 +284,15 @@ struct afs_net {
struct socket *socket;
struct afs_call *spare_incoming_call;
struct work_struct charge_preallocation_work;
+ struct work_struct rx_oob_work;
struct mutex socket_mutex;
atomic_t nr_outstanding_calls;
atomic_t nr_superblocks;
/* Cell database */
struct rb_root cells;
- struct afs_cell *ws_cell;
- struct work_struct cells_manager;
- struct timer_list cells_timer;
+ struct idr cells_dyn_ino; /* cell->dynroot_ino mapping */
+ struct afs_cell __rcu *ws_cell;
atomic_t cells_outstanding;
struct rw_semaphore cells_lock;
struct mutex cells_alias_lock;
@@ -315,18 +304,12 @@ struct afs_net {
* cell, but in practice, people create aliases and subsets and there's
* no easy way to distinguish them.
*/
- seqlock_t fs_lock; /* For fs_servers, fs_probe_*, fs_proc */
- struct rb_root fs_servers; /* afs_server (by server UUID or address) */
+ seqlock_t fs_lock; /* For fs_probe_*, fs_proc */
struct list_head fs_probe_fast; /* List of afs_server to probe at 30s intervals */
struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */
struct hlist_head fs_proc; /* procfs servers list */
- struct hlist_head fs_addresses; /* afs_server (by lowest IPv6 addr) */
- seqlock_t fs_addr_lock; /* For fs_addresses[46] */
-
- struct work_struct fs_manager;
- struct timer_list fs_timer;
-
+ struct key *fs_cm_token_key; /* Key for creating CM tokens */
struct work_struct fs_prober;
struct timer_list fs_probe_timer;
atomic_t servers_outstanding;
@@ -359,13 +342,10 @@ struct afs_net {
extern const char afs_init_sysname[];
enum afs_cell_state {
- AFS_CELL_UNSET,
- AFS_CELL_ACTIVATING,
+ AFS_CELL_SETTING_UP,
AFS_CELL_ACTIVE,
- AFS_CELL_DEACTIVATING,
- AFS_CELL_INACTIVE,
- AFS_CELL_FAILED,
- AFS_CELL_REMOVED,
+ AFS_CELL_REMOVING,
+ AFS_CELL_DEAD,
};
/*
@@ -396,7 +376,9 @@ struct afs_cell {
struct afs_cell *alias_of; /* The cell this is an alias of */
struct afs_volume *root_volume; /* The root.cell volume if there is one */
struct key *anonymous_key; /* anonymous user key for this cell */
+ struct work_struct destroyer; /* Destroyer for cell */
struct work_struct manager; /* Manager for init/deinit/dns */
+ struct timer_list management_timer; /* General management timer */
struct hlist_node proc_link; /* /proc cell list link */
time64_t dns_expiry; /* Time AFSDB/SRV record expires */
time64_t last_inactive; /* Time of last drop of usage count */
@@ -412,6 +394,7 @@ struct afs_cell {
enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
unsigned int dns_lookup_count; /* Counter of DNS lookups */
unsigned int debug_id;
+ unsigned int dynroot_ino; /* Inode numbers for dynroot (a pair) */
/* The volumes belonging to this cell */
struct rw_semaphore vs_lock; /* Lock for server->volumes */
@@ -421,7 +404,7 @@ struct afs_cell {
/* Active fileserver interaction state. */
struct rb_root fs_servers; /* afs_server (by server UUID) */
- seqlock_t fs_lock; /* For fs_servers */
+ struct rw_semaphore fs_lock; /* For fs_servers */
/* VL server list. */
rwlock_t vl_servers_lock; /* Lock on vl_servers */
@@ -556,22 +539,24 @@ struct afs_server {
};
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
- struct rb_node uuid_rb; /* Link in net->fs_servers */
- struct afs_server __rcu *uuid_next; /* Next server with same UUID */
- struct afs_server *uuid_prev; /* Previous server with same UUID */
- struct list_head probe_link; /* Link in net->fs_probe_list */
- struct hlist_node addr_link; /* Link in net->fs_addresses6 */
+ struct rb_node uuid_rb; /* Link in cell->fs_servers */
+ struct list_head probe_link; /* Link in net->fs_probe_* */
struct hlist_node proc_link; /* Link in net->fs_proc */
struct list_head volumes; /* RCU list of afs_server_entry objects */
- struct afs_server *gc_next; /* Next server in manager's list */
+ struct work_struct destroyer; /* Work item to try and destroy a server */
+ struct timer_list timer; /* Management timer */
+ struct mutex cm_token_lock; /* Lock governing creation of appdata */
+ struct krb5_buffer cm_rxgk_appdata; /* Appdata to be included in RESPONSE packet */
time64_t unuse_time; /* Time at which last unused */
unsigned long flags;
#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */
#define AFS_SERVER_FL_UPDATING 1
#define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */
-#define AFS_SERVER_FL_NOT_READY 4 /* The record is not ready for use */
-#define AFS_SERVER_FL_NOT_FOUND 5 /* VL server says no such server */
-#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */
+#define AFS_SERVER_FL_UNCREATED 3 /* The record needs creating */
+#define AFS_SERVER_FL_CREATING 4 /* The record is being created */
+#define AFS_SERVER_FL_EXPIRED 5 /* The record has expired */
+#define AFS_SERVER_FL_NOT_FOUND 6 /* VL server says no such server */
+#define AFS_SERVER_FL_VL_FAIL 7 /* Failed to access VL server */
#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
#define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */
#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */
@@ -581,6 +566,7 @@ struct afs_server {
atomic_t active; /* Active user count */
u32 addr_version; /* Address list version */
u16 service_id; /* Service ID we're using. */
+ short create_error; /* Creation error */
unsigned int rtt; /* Server's current RTT in uS */
unsigned int debug_id; /* Debugging ID for traces */
@@ -635,6 +621,7 @@ struct afs_volume {
afs_volid_t vid; /* The volume ID of this volume */
afs_volid_t vids[AFS_MAXTYPES]; /* All associated volume IDs */
refcount_t ref;
+ unsigned int debug_id; /* Debugging ID for traces */
time64_t update_at; /* Time at which to next update */
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
struct rb_node cell_node; /* Link in cell->volumes */
@@ -701,24 +688,26 @@ struct afs_vnode {
struct afs_file_status status; /* AFS status info for this file */
afs_dataversion_t invalid_before; /* Child dentries are invalid before this */
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
- struct mutex io_lock; /* Lock for serialising I/O on this mutex */
+ struct list_head io_lock_waiters; /* Threads waiting for the I/O lock */
struct rw_semaphore validate_lock; /* lock for validating this vnode */
struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */
struct key *silly_key; /* Silly rename key */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
+#define AFS_VNODE_IO_LOCK 0 /* Set if the I/O serialisation lock is held */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
-#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
+#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
+ struct folio_queue *directory; /* Directory contents */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
@@ -727,6 +716,7 @@ struct afs_vnode {
ktime_t locked_at; /* Time at which lock obtained */
enum afs_lock_state lock_state : 8;
afs_lock_type_t lock_type : 8;
+ unsigned int directory_size; /* Amount of space in ->directory */
/* outstanding callback notification on this file */
struct work_struct cb_work; /* Work for mmap'd files */
@@ -906,7 +896,7 @@ struct afs_operation {
bool new_negative;
} rename;
struct {
- struct afs_read *req;
+ struct netfs_io_subrequest *subreq;
} fetch;
struct {
afs_lock_type_t type;
@@ -916,7 +906,6 @@ struct afs_operation {
loff_t pos;
loff_t size;
loff_t i_size;
- bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
@@ -959,6 +948,7 @@ struct afs_operation {
#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */
+#define AFS_OPERATION_ASYNC 0x2000 /* Set if should run asynchronously */
};
/*
@@ -983,6 +973,21 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
i_size_read(&vnode->netfs.inode), flags);
}
+/*
+ * Directory iteration management.
+ */
+struct afs_dir_iter {
+ struct afs_vnode *dvnode;
+ union afs_xdr_dir_block *block;
+ struct folio_queue *fq;
+ unsigned int fpos;
+ int fq_slot;
+ unsigned int loop_check;
+ u8 nr_slots;
+ u8 bucket;
+ unsigned int prev_entry;
+};
+
#include <trace/events/afs.h>
/*****************************************************************************/
@@ -1003,6 +1008,9 @@ extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr,
__be32 xdr, u16 port);
extern int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *addr,
__be32 *xdr, u16 port);
+void afs_set_peer_appdata(struct afs_server *server,
+ struct afs_addr_list *old_alist,
+ struct afs_addr_list *new_alist);
/*
* addr_prefs.c
@@ -1039,16 +1047,17 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
extern int afs_cell_init(struct afs_net *, const char *);
extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned,
enum afs_cell_trace);
-extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
- const char *, bool);
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ const char *vllist, bool excl,
+ enum afs_cell_trace trace);
extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace);
-extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace);
+void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason);
extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace);
-extern void afs_manage_cells(struct work_struct *);
-extern void afs_cells_timer(struct timer_list *);
+void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs);
extern void __net_exit afs_cell_purge(struct afs_net *);
/*
@@ -1057,6 +1066,19 @@ extern void __net_exit afs_cell_purge(struct afs_net *);
extern bool afs_cm_incoming_call(struct afs_call *);
/*
+ * cm_security.c
+ */
+void afs_process_oob_queue(struct work_struct *work);
+#ifdef CONFIG_RXGK
+int afs_create_token_key(struct afs_net *net, struct socket *socket);
+#else
+static inline int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+ return 0;
+}
+#endif
+
+/*
* dir.c
*/
extern const struct file_operations afs_dir_file_operations;
@@ -1064,8 +1086,13 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ __acquires(&dvnode->validate_lock);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
+int afs_single_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
/*
* dir_edit.c
@@ -1073,6 +1100,20 @@ extern void afs_check_for_remote_deletion(struct afs_operation *);
extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
enum afs_edit_dir_reason);
extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
+ enum afs_edit_dir_reason why);
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode);
+
+/*
+ * dir_search.c
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name);
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name);
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block);
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+ struct afs_fid *_fid);
+int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
+ struct afs_fid *_fid, afs_dataversion_t *_dir_version);
/*
* dir_silly.c
@@ -1087,34 +1128,23 @@ extern int afs_silly_iput(struct dentry *, struct inode *);
extern const struct inode_operations afs_dynroot_inode_operations;
extern const struct dentry_operations afs_dynroot_dentry_operations;
-extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *);
-extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *);
-extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *);
-extern int afs_dynroot_populate(struct super_block *);
-extern void afs_dynroot_depopulate(struct super_block *);
+struct inode *afs_dynroot_iget_root(struct super_block *sb);
/*
* file.c
*/
extern const struct address_space_operations afs_file_aops;
-extern const struct address_space_operations afs_symlink_aops;
extern const struct inode_operations afs_file_inode_operations;
extern const struct file_operations afs_file_operations;
+extern const struct afs_operation_ops afs_fetch_data_operation;
extern const struct netfs_request_ops afs_req_ops;
extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
extern void afs_put_wb_key(struct afs_wb_key *);
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
-extern struct afs_read *afs_alloc_read(gfp_t);
-extern void afs_put_read(struct afs_read *);
-
-static inline struct afs_read *afs_get_read(struct afs_read *req)
-{
- refcount_inc(&req->usage);
- return req;
-}
+void afs_fetch_data_async_rx(struct work_struct *work);
+void afs_fetch_data_immediate_cancel(struct afs_call *call);
/*
* flock.c
@@ -1166,6 +1196,7 @@ extern void afs_fs_store_acl(struct afs_operation *);
extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *);
extern int afs_put_operation(struct afs_operation *);
extern bool afs_begin_vnode_operation(struct afs_operation *);
+extern void afs_end_vnode_operation(struct afs_operation *op);
extern void afs_wait_for_operation(struct afs_operation *);
extern int afs_do_sync_operation(struct afs_operation *);
@@ -1189,8 +1220,8 @@ struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *est
enum afs_estate_trace where);
void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where);
extern void afs_fileserver_probe_result(struct afs_call *);
-void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
- struct afs_addr_list *new_addrs, struct key *key);
+int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct afs_addr_list *new_alist, struct key *key);
int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr);
extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
extern void afs_fs_probe_dispatcher(struct work_struct *);
@@ -1203,10 +1234,13 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
*/
extern const struct afs_operation_ops afs_fetch_status_operation;
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback);
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
-extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
extern struct inode *afs_root_iget(struct super_block *, struct key *);
extern int afs_getattr(struct mnt_idmap *idmap, const struct path *,
@@ -1332,7 +1366,9 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
+void afs_deferred_put_call(struct afs_call *call);
void afs_make_call(struct afs_call *call, gfp_t gfp);
+void afs_deliver_to_call(struct afs_call *call);
void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
@@ -1343,6 +1379,28 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
extern int afs_extract_data(struct afs_call *, bool);
extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
+static inline struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int r;
+
+ __refcount_inc(&call->ref, &r);
+
+ trace_afs_call(call->debug_id, why, r + 1,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
+static inline void afs_see_call(struct afs_call *call, enum afs_call_trace why)
+{
+ int r = refcount_read(&call->ref);
+
+ trace_afs_call(call->debug_id, why, r,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+}
+
static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
gfp_t gfp)
{
@@ -1464,20 +1522,30 @@ extern void __exit afs_clean_up_permit_cache(void);
*/
extern spinlock_t afs_server_peer_lock;
-extern struct afs_server *afs_find_server(struct afs_net *, const struct rxrpc_peer *);
-extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
+struct afs_server *afs_find_server(const struct rxrpc_peer *peer);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32);
extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
-extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace);
-extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
-extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace);
+struct afs_server *afs_use_server(struct afs_server *server, bool activate,
+ enum afs_server_trace reason);
+void afs_unuse_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason);
+void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason);
extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
-extern void afs_manage_servers(struct work_struct *);
-extern void afs_servers_timer(struct timer_list *);
+void afs_purge_servers(struct afs_cell *cell);
extern void afs_fs_probe_timer(struct timer_list *);
-extern void __net_exit afs_purge_servers(struct afs_net *);
+void __net_exit afs_wait_for_servers(struct afs_net *net);
bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key);
+static inline void afs_see_server(struct afs_server *server, enum afs_server_trace trace)
+{
+ int r = refcount_read(&server->ref);
+ int a = atomic_read(&server->active);
+
+ trace_afs_server(server->debug_id, r, a, trace);
+
+}
+
static inline void afs_inc_servers_outstanding(struct afs_net *net)
{
atomic_inc(&net->servers_outstanding);
@@ -1599,11 +1667,14 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
/*
* write.c
*/
+void afs_prepare_write(struct netfs_io_subrequest *subreq);
+void afs_issue_write(struct netfs_io_subrequest *subreq);
+void afs_begin_writeback(struct netfs_io_request *wreq);
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream);
extern int afs_writepages(struct address_space *, struct writeback_control *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
extern void afs_prune_wb_keys(struct afs_vnode *);
-void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len);
/*
* xattr.c
@@ -1706,6 +1777,38 @@ static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
return -EIO;
}
+/*
+ * Set the callback promise on a vnode.
+ */
+static inline void afs_set_cb_promise(struct afs_vnode *vnode, time64_t expires_at,
+ enum afs_cb_promise_trace trace)
+{
+ atomic64_set(&vnode->cb_expires_at, expires_at);
+ trace_afs_cb_promise(vnode, trace);
+}
+
+/*
+ * Clear the callback promise on a vnode, returning true if it was promised.
+ */
+static inline bool afs_clear_cb_promise(struct afs_vnode *vnode,
+ enum afs_cb_promise_trace trace)
+{
+ trace_afs_cb_promise(vnode, trace);
+ return atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE;
+}
+
+/*
+ * Mark a directory as being invalid.
+ */
+static inline void afs_invalidate_dir(struct afs_vnode *dvnode,
+ enum afs_dir_invalid_trace trace)
+{
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ trace_afs_dir_invalid(dvnode, trace);
+ afs_stat_v(dvnode, n_inval);
+ }
+}
+
/*****************************************************************************/
/*
* debug tracing
diff --git a/fs/afs/main.c b/fs/afs/main.c
index a14f6013e316..02475d415d88 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -73,28 +73,21 @@ static int __net_init afs_net_init(struct net *net_ns)
generate_random_uuid((unsigned char *)&net->uuid);
INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+ INIT_WORK(&net->rx_oob_work, afs_process_oob_queue);
mutex_init(&net->socket_mutex);
net->cells = RB_ROOT;
+ idr_init(&net->cells_dyn_ino);
init_rwsem(&net->cells_lock);
- INIT_WORK(&net->cells_manager, afs_manage_cells);
- timer_setup(&net->cells_timer, afs_cells_timer, 0);
-
mutex_init(&net->cells_alias_lock);
mutex_init(&net->proc_cells_lock);
INIT_HLIST_HEAD(&net->proc_cells);
seqlock_init(&net->fs_lock);
- net->fs_servers = RB_ROOT;
INIT_LIST_HEAD(&net->fs_probe_fast);
INIT_LIST_HEAD(&net->fs_probe_slow);
INIT_HLIST_HEAD(&net->fs_proc);
- INIT_HLIST_HEAD(&net->fs_addresses);
- seqlock_init(&net->fs_addr_lock);
-
- INIT_WORK(&net->fs_manager, afs_manage_servers);
- timer_setup(&net->fs_timer, afs_servers_timer, 0);
INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher);
timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0);
atomic_set(&net->servers_outstanding, 1);
@@ -130,13 +123,14 @@ error_open_socket:
net->live = false;
afs_fs_probe_cleanup(net);
afs_cell_purge(net);
- afs_purge_servers(net);
+ afs_wait_for_servers(net);
error_cell_init:
net->live = false;
afs_proc_cleanup(net);
error_proc:
afs_put_sysnames(net->sysnames);
error_sysnames:
+ idr_destroy(&net->cells_dyn_ino);
net->live = false;
return ret;
}
@@ -151,10 +145,11 @@ static void __net_exit afs_net_exit(struct net *net_ns)
net->live = false;
afs_fs_probe_cleanup(net);
afs_cell_purge(net);
- afs_purge_servers(net);
+ afs_wait_for_servers(net);
afs_close_socket(net);
afs_proc_cleanup(net);
afs_put_sysnames(net->sysnames);
+ idr_destroy(&net->cells_dyn_ino);
kfree_rcu(rcu_access_pointer(net->address_prefs), rcu);
}
@@ -177,7 +172,7 @@ static int __init afs_init(void)
afs_wq = alloc_workqueue("afs", 0, 0);
if (!afs_wq)
goto error_afs_wq;
- afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!afs_async_calls)
goto error_async;
afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index b8180bf2281f..8f2b3a177690 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
+#include <crypto/krb5.h>
#include "internal.h"
#include "afs_fs.h"
#include "protocol_uae.h"
@@ -103,6 +104,32 @@ int afs_abort_to_error(u32 abort_code)
case RXKADDATALEN: return -EKEYREJECTED;
case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+ case RXGK_INCONSISTENCY: return -EPROTO;
+ case RXGK_PACKETSHORT: return -EPROTO;
+ case RXGK_BADCHALLENGE: return -EPROTO;
+ case RXGK_SEALEDINCON: return -EKEYREJECTED;
+ case RXGK_NOTAUTH: return -EKEYREJECTED;
+ case RXGK_EXPIRED: return -EKEYEXPIRED;
+ case RXGK_BADLEVEL: return -EKEYREJECTED;
+ case RXGK_BADKEYNO: return -EKEYREJECTED;
+ case RXGK_NOTRXGK: return -EKEYREJECTED;
+ case RXGK_UNSUPPORTED: return -EKEYREJECTED;
+ case RXGK_GSSERROR: return -EKEYREJECTED;
+#ifdef RXGK_BADETYPE
+ case RXGK_BADETYPE: return -ENOPKG;
+#endif
+#ifdef RXGK_BADTOKEN
+ case RXGK_BADTOKEN: return -EKEYREJECTED;
+#endif
+#ifdef RXGK_BADETYPE
+ case RXGK_DATALEN: return -EPROTO;
+#endif
+#ifdef RXGK_BADQOP
+ case RXGK_BADQOP: return -EKEYREJECTED;
+#endif
+
+ case KRB5_PROG_KEYTYPE_NOSUPP: return -ENOPKG;
+
case RXGEN_OPCODE: return -ENOTSUPP;
default: return -EREMOTEIO;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 97f50e9fd9eb..9434a5399f2b 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -30,7 +30,7 @@ const struct file_operations afs_mntpt_file_operations = {
const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
- .readlink = page_readlink,
+ .readlink = afs_readlink,
.getattr = afs_getattr,
};
@@ -87,7 +87,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ctx->force = true;
}
if (ctx->cell) {
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_mntpt);
ctx->cell = NULL;
}
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
@@ -107,7 +107,8 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (size > AFS_MAXCELLNAME)
return -ENAMETOOLONG;
- cell = afs_lookup_cell(ctx->net, p, size, NULL, false);
+ cell = afs_lookup_cell(ctx->net, p, size, NULL, false,
+ afs_cell_trace_use_lookup_mntpt);
if (IS_ERR(cell)) {
pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
return PTR_ERR(cell);
@@ -118,9 +119,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ctx->volnamesz = sizeof(afs_root_volume) - 1;
} else {
/* read the contents of the AFS special symlink */
- struct page *page;
+ DEFINE_DELAYED_CALL(cleanup);
+ const char *content;
loff_t size = i_size_read(d_inode(mntpt));
- char *buf;
if (src_as->cell)
ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
@@ -128,18 +129,23 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (size < 2 || size > PAGE_SIZE - 1)
return -EINVAL;
- page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ content = afs_get_link(mntpt, d_inode(mntpt), &cleanup);
+ if (IS_ERR(content)) {
+ do_delayed_call(&cleanup);
+ return PTR_ERR(content);
+ }
- buf = kmap(page);
ret = -EINVAL;
- if (buf[size - 1] == '.')
- ret = vfs_parse_fs_string(fc, "source", buf, size - 1);
- kunmap(page);
- put_page(page);
+ if (content[size - 1] == '.')
+ ret = vfs_parse_fs_string(fc, "source", content, size - 1);
+ do_delayed_call(&cleanup);
if (ret < 0)
return ret;
+
+ /* Don't cross a backup volume mountpoint from a backup volume */
+ if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
+ ctx->type == AFSVL_BACKVOL)
+ return -ENODEV;
}
return 0;
@@ -183,7 +189,6 @@ struct vfsmount *afs_d_automount(struct path *path)
if (IS_ERR(newmnt))
return newmnt;
- mntget(newmnt); /* prevent immediate expiration */
mnt_set_expiry(newmnt, &afs_vfsmounts);
queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
afs_mntpt_expiry_timeout * HZ);
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 15eab053af6d..40e879c8ca77 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -122,14 +122,15 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
if (strcmp(buf, "add") == 0) {
struct afs_cell *cell;
- cell = afs_lookup_cell(net, name, strlen(name), args, true);
+ cell = afs_lookup_cell(net, name, strlen(name), args, true,
+ afs_cell_trace_use_lookup_add);
if (IS_ERR(cell)) {
ret = PTR_ERR(cell);
goto done;
}
if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
- afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin);
+ afs_unuse_cell(cell, afs_cell_trace_unuse_no_pin);
} else {
goto inval;
}
@@ -206,7 +207,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v)
net = afs_seq2net_single(m);
down_read(&net->cells_lock);
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
if (cell)
seq_printf(m, "%s\n", cell->name);
up_read(&net->cells_lock);
@@ -240,7 +241,13 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size)
/* determine command to perform */
_debug("rootcell=%s", buf);
- ret = afs_cell_init(net, buf);
+ ret = -EEXIST;
+ inode_lock(file_inode(file));
+ if (!rcu_access_pointer(net->ws_cell))
+ ret = afs_cell_init(net, buf);
+ else
+ printk("busy\n");
+ inode_unlock(file_inode(file));
out:
_leave(" = %d", ret);
@@ -437,8 +444,6 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
}
server = list_entry(v, struct afs_server, proc_link);
- estate = rcu_dereference(server->endpoint_state);
- alist = estate->addresses;
seq_printf(m, "%pU %3d %3d %s\n",
&server->uuid,
refcount_read(&server->ref),
@@ -448,10 +453,16 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
server->flags, server->rtt);
seq_printf(m, " - probe: last=%d\n",
(int)(jiffies - server->probed_at) / HZ);
+
+ estate = rcu_dereference(server->endpoint_state);
+ if (!estate)
+ goto out;
failed = estate->failed_set;
seq_printf(m, " - ESTATE pq=%x np=%u rsp=%lx f=%lx\n",
estate->probe_seq, atomic_read(&estate->nr_probing),
estate->responsive_set, estate->failed_set);
+
+ alist = estate->addresses;
seq_printf(m, " - ALIST v=%u ap=%u\n",
alist->version, alist->addr_pref_version);
for (i = 0; i < alist->nr_addrs; i++) {
@@ -464,6 +475,8 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
rxrpc_kernel_get_srtt(addr->peer),
addr->last_error, addr->prio);
}
+
+out:
return 0;
}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index ed04bd1eeae8..a1c24f589d9e 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -99,7 +99,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
write_seqlock(&vnode->cb_lock);
ASSERTCMP(cb_server, ==, vnode->cb_server);
vnode->cb_server = NULL;
- if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
+ if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server))
vnode->cb_break++;
write_sequnlock(&vnode->cb_lock);
}
@@ -541,11 +541,13 @@ pick_server:
test_bit(AFS_SE_EXCLUDED, &se->flags) ||
!test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
continue;
- es = op->server_states->endpoint_state;
+ es = op->server_states[i].endpoint_state;
sal = es->addresses;
afs_get_address_preferences_rcu(op->net, sal);
for (j = 0; j < sal->nr_addrs; j++) {
+ if (es->failed_set & (1 << j))
+ continue;
if (!sal->addrs[j].peer)
continue;
if (sal->addrs[j].prio > best_prio) {
@@ -581,7 +583,7 @@ selected_server:
if (vnode->cb_server != server) {
vnode->cb_server = server;
vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change);
}
retry_server:
@@ -605,6 +607,8 @@ iterate_address:
best_prio = -1;
addr_index = 0;
for (i = 0; i < alist->nr_addrs; i++) {
+ if (!(set & (1 << i)))
+ continue;
if (alist->addrs[i].prio > best_prio) {
addr_index = i;
best_prio = alist->addrs[i].prio;
@@ -628,8 +632,10 @@ iterate_address:
wait_for_more_probe_results:
error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
!(op->flags & AFS_OPERATION_UNINTR));
- if (!error)
+ if (error == 1)
goto iterate_address;
+ if (!error)
+ goto restart_from_beginning;
/* We've now had a failure to respond on all of a server's addresses -
* immediately probe them again and consider retrying the server.
@@ -640,10 +646,13 @@ wait_for_more_probe_results:
error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
!(op->flags & AFS_OPERATION_UNINTR));
switch (error) {
- case 0:
+ case 1:
op->flags &= ~AFS_OPERATION_RETRY_SERVER;
- trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
+ trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
goto retry_server;
+ case 0:
+ trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
+ goto restart_from_beginning;
case -ERESTARTSYS:
afs_op_set_error(op, error);
goto failed;
@@ -674,7 +683,7 @@ no_more_servers:
for (i = 0; i < op->server_list->nr_servers; i++) {
struct afs_endpoint_state *estate;
- estate = op->server_states->endpoint_state;
+ estate = op->server_states[i].endpoint_state;
error = READ_ONCE(estate->error);
if (error < 0)
afs_op_accumulate_error(op, error, estate->abort_code);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c453428f3c8b..c1cadf8fb346 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,13 +18,23 @@
struct workqueue_struct *afs_async_calls;
+static void afs_deferred_free_worker(struct work_struct *work);
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID);
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob);
static int afs_deliver_cm_op_id(struct afs_call *);
+static const struct rxrpc_kernel_ops afs_rxrpc_callback_ops = {
+ .notify_new_call = afs_rx_new_call,
+ .discard_new_call = afs_rx_discard_new_call,
+ .user_attach_call = afs_rx_attach,
+ .notify_oob = afs_rx_notify_oob,
+};
+
/* asynchronous incoming call initial processing */
static const struct afs_call_type afs_RXCMxxxx = {
.name = "CB.xxxx",
@@ -48,6 +58,7 @@ int afs_open_socket(struct afs_net *net)
goto error_1;
socket->sk->sk_allocation = GFP_NOFS;
+ socket->sk->sk_user_data = net;
/* bind the callback manager's address to make this a server socket */
memset(&srx, 0, sizeof(srx));
@@ -63,6 +74,14 @@ int afs_open_socket(struct afs_net *net)
if (ret < 0)
goto error_2;
+ ret = rxrpc_sock_set_manage_response(socket->sk, true);
+ if (ret < 0)
+ goto error_2;
+
+ ret = afs_create_token_key(net, socket);
+ if (ret < 0)
+ pr_err("Couldn't create RxGK CM key: %d\n", ret);
+
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
if (ret == -EADDRINUSE) {
srx.transport.sin6.sin6_port = 0;
@@ -83,8 +102,7 @@ int afs_open_socket(struct afs_net *net)
* it sends back to us.
*/
- rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
- afs_rx_discard_new_call);
+ rxrpc_kernel_set_notifications(socket, &afs_rxrpc_callback_ops);
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
@@ -124,7 +142,9 @@ void afs_close_socket(struct afs_net *net)
kernel_sock_shutdown(net->socket, SHUT_RDWR);
flush_workqueue(afs_async_calls);
+ net->socket->sk->sk_user_data = NULL;
sock_release(net->socket);
+ key_put(net->fs_cm_token_key);
_debug("dework");
_leave("");
@@ -148,7 +168,9 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->net = net;
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
refcount_set(&call->ref, 1);
- INIT_WORK(&call->async_work, afs_process_async_call);
+ INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call);
+ INIT_WORK(&call->work, call->type->work);
+ INIT_WORK(&call->free_work, afs_deferred_free_worker);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
call->iter = &call->def_iter;
@@ -159,6 +181,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
return call;
}
+static void afs_free_call(struct afs_call *call)
+{
+ struct afs_net *net = call->net;
+ int o;
+
+ ASSERT(!work_pending(&call->async_work));
+
+ rxrpc_kernel_put_peer(call->peer);
+
+ if (call->rxcall) {
+ rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
+ rxrpc_kernel_put_call(net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->type->destructor)
+ call->type->destructor(call);
+
+ afs_unuse_server_notime(call->net, call->server, afs_server_trace_unuse_call);
+ kfree(call->request);
+
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
+ __builtin_return_address(0));
+ kfree(call);
+
+ o = atomic_dec_return(&net->nr_outstanding_calls);
+ if (o == 0)
+ wake_up_var(&net->nr_outstanding_calls);
+}
+
/*
* Dispose of a reference on a call.
*/
@@ -173,45 +225,34 @@ void afs_put_call(struct afs_call *call)
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
+ if (zero)
+ afs_free_call(call);
+}
- if (zero) {
- ASSERT(!work_pending(&call->async_work));
- ASSERT(call->type->name != NULL);
-
- rxrpc_kernel_put_peer(call->peer);
-
- if (call->rxcall) {
- rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
- rxrpc_kernel_put_call(net->socket, call->rxcall);
- call->rxcall = NULL;
- }
- if (call->type->destructor)
- call->type->destructor(call);
-
- afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
- kfree(call->request);
-
- trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
- __builtin_return_address(0));
- kfree(call);
+static void afs_deferred_free_worker(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, free_work);
- o = atomic_dec_return(&net->nr_outstanding_calls);
- if (o == 0)
- wake_up_var(&net->nr_outstanding_calls);
- }
+ afs_free_call(call);
}
-static struct afs_call *afs_get_call(struct afs_call *call,
- enum afs_call_trace why)
+/*
+ * Dispose of a reference on a call, deferring the cleanup to a workqueue
+ * to avoid lock recursion.
+ */
+void afs_deferred_put_call(struct afs_call *call)
{
- int r;
-
- __refcount_inc(&call->ref, &r);
+ struct afs_net *net = call->net;
+ unsigned int debug_id = call->debug_id;
+ bool zero;
+ int r, o;
- trace_afs_call(call->debug_id, why, r + 1,
- atomic_read(&call->net->nr_outstanding_calls),
+ zero = __refcount_dec_and_test(&call->ref, &r);
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
- return call;
+ if (zero)
+ schedule_work(&call->free_work);
}
/*
@@ -220,8 +261,6 @@ static struct afs_call *afs_get_call(struct afs_call *call,
static void afs_queue_call_work(struct afs_call *call)
{
if (call->type->work) {
- INIT_WORK(&call->work, call->type->work);
-
afs_get_call(call, afs_call_trace_work);
if (!queue_work(afs_wq, &call->work))
afs_put_call(call);
@@ -396,11 +435,16 @@ void afs_make_call(struct afs_call *call, gfp_t gfp)
return;
error_do_abort:
- if (ret != -ECONNABORTED) {
+ if (ret != -ECONNABORTED)
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret,
afs_abort_send_data_error);
- } else {
+ if (call->async) {
+ afs_see_call(call, afs_call_trace_async_abort);
+ return;
+ }
+
+ if (ret == -ECONNABORTED) {
len = 0;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
rxrpc_kernel_recv_data(call->net->socket, rxcall,
@@ -411,8 +455,10 @@ error_do_abort:
call->error = ret;
trace_afs_call_done(call);
error_kill_call:
- if (call->type->done)
- call->type->done(call);
+ if (call->async)
+ afs_see_call(call, afs_call_trace_async_kill);
+ if (call->type->immediate_cancel)
+ call->type->immediate_cancel(call);
/* We need to dispose of the extra ref we grabbed for an async call.
* The call, however, might be queued on afs_async_calls and we need to
@@ -467,7 +513,7 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort)
/*
* deliver messages to a call
*/
-static void afs_deliver_to_call(struct afs_call *call)
+void afs_deliver_to_call(struct afs_call *call)
{
enum afs_call_state state;
size_t len;
@@ -568,7 +614,6 @@ local_abort:
abort_code = 0;
call_complete:
afs_set_call_complete(call, ret, remote_abort);
- state = AFS_CALL_COMPLETE;
goto done;
}
@@ -640,7 +685,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
- * wake up an asynchronous call
+ * Wake up an asynchronous call. The caller is holding the call notify
+ * spinlock around this, so we can't call afs_put_call().
*/
static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long call_user_ID)
@@ -657,7 +703,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
__builtin_return_address(0));
if (!queue_work(afs_async_calls, &call->async_work))
- afs_put_call(call);
+ afs_deferred_put_call(call);
}
}
@@ -711,7 +757,6 @@ void afs_charge_preallocation(struct work_struct *work)
if (rxrpc_kernel_charge_accept(net->socket,
afs_wake_up_async_call,
- afs_rx_attach,
(unsigned long)call,
GFP_KERNEL,
call->debug_id) < 0)
@@ -739,8 +784,14 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long user_call_ID)
{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
struct afs_net *net = afs_sock2net(sk);
+ call->peer = rxrpc_kernel_get_call_peer(sk->sk_socket, call->rxcall);
+ call->server = afs_find_server(call->peer);
+ if (!call->server)
+ trace_afs_cm_no_server(call, rxrpc_kernel_remote_srx(call->peer));
+
queue_work(afs_wq, &net->charge_preallocation_work);
}
@@ -767,9 +818,14 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
if (!afs_cm_incoming_call(call))
return -ENOTSUPP;
+ call->security_ix = rxrpc_kernel_query_call_security(call->rxcall,
+ &call->service_id,
+ &call->enctype);
+
trace_afs_cb_call(call);
+ call->work.func = call->type->work;
- /* pass responsibility for the remainer of this message off to the
+ /* pass responsibility for the remainder of this message off to the
* cache manager op */
return call->type->deliver(call);
}
@@ -918,3 +974,13 @@ noinline int afs_protocol_error(struct afs_call *call,
call->unmarshalling_error = true;
return -EBADMSG;
}
+
+/*
+ * Wake up OOB notification processing.
+ */
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob)
+{
+ struct afs_net *net = sk->sk_user_data;
+
+ schedule_work(&net->rx_oob_work);
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 038f9d0ae3af..a97562f831eb 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -14,188 +14,104 @@
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static atomic_t afs_server_debug_id;
-static struct afs_server *afs_maybe_use_server(struct afs_server *,
- enum afs_server_trace);
static void __afs_put_server(struct afs_net *, struct afs_server *);
+static void afs_server_timer(struct timer_list *timer);
+static void afs_server_destroyer(struct work_struct *work);
/*
* Find a server by one of its addresses.
*/
-struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer *peer)
+struct afs_server *afs_find_server(const struct rxrpc_peer *peer)
{
- const struct afs_endpoint_state *estate;
- const struct afs_addr_list *alist;
- struct afs_server *server = NULL;
- unsigned int i;
- int seq = 1;
+ struct afs_server *server = (struct afs_server *)rxrpc_kernel_get_peer_data(peer);
- rcu_read_lock();
-
- do {
- if (server)
- afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
- server = NULL;
- seq++; /* 2 on the 1st/lockless path, otherwise odd */
- read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
-
- hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) {
- estate = rcu_dereference(server->endpoint_state);
- alist = estate->addresses;
- for (i = 0; i < alist->nr_addrs; i++)
- if (alist->addrs[i].peer == peer)
- goto found;
- }
-
- server = NULL;
- continue;
- found:
- server = afs_maybe_use_server(server, afs_server_trace_get_by_addr);
-
- } while (need_seqretry(&net->fs_addr_lock, seq));
-
- done_seqretry(&net->fs_addr_lock, seq);
-
- rcu_read_unlock();
- return server;
+ if (!server)
+ return NULL;
+ return afs_use_server(server, false, afs_server_trace_use_cm_call);
}
/*
- * Look up a server by its UUID and mark it active.
+ * Look up a server by its UUID and mark it active. The caller must hold
+ * cell->fs_lock.
*/
-struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
+static struct afs_server *afs_find_server_by_uuid(struct afs_cell *cell, const uuid_t *uuid)
{
- struct afs_server *server = NULL;
+ struct afs_server *server;
struct rb_node *p;
- int diff, seq = 1;
+ int diff;
_enter("%pU", uuid);
- do {
- /* Unfortunately, rbtree walking doesn't give reliable results
- * under just the RCU read lock, so we have to check for
- * changes.
- */
- if (server)
- afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
- server = NULL;
- seq++; /* 2 on the 1st/lockless path, otherwise odd */
- read_seqbegin_or_lock(&net->fs_lock, &seq);
-
- p = net->fs_servers.rb_node;
- while (p) {
- server = rb_entry(p, struct afs_server, uuid_rb);
-
- diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
- if (diff < 0) {
- p = p->rb_left;
- } else if (diff > 0) {
- p = p->rb_right;
- } else {
- afs_use_server(server, afs_server_trace_get_by_uuid);
- break;
- }
-
- server = NULL;
- }
- } while (need_seqretry(&net->fs_lock, seq));
+ p = cell->fs_servers.rb_node;
+ while (p) {
+ server = rb_entry(p, struct afs_server, uuid_rb);
- done_seqretry(&net->fs_lock, seq);
+ diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
+ if (diff < 0) {
+ p = p->rb_left;
+ } else if (diff > 0) {
+ p = p->rb_right;
+ } else {
+ if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags))
+ return NULL; /* Need a write lock */
+ afs_use_server(server, true, afs_server_trace_use_by_uuid);
+ return server;
+ }
+ }
- _leave(" = %p", server);
- return server;
+ return NULL;
}
/*
- * Install a server record in the namespace tree. If there's a clash, we stick
- * it into a list anchored on whichever afs_server struct is actually in the
- * tree.
+ * Install a server record in the cell tree. The caller must hold an exclusive
+ * lock on cell->fs_lock.
*/
static struct afs_server *afs_install_server(struct afs_cell *cell,
- struct afs_server *candidate)
+ struct afs_server **candidate)
{
- const struct afs_endpoint_state *estate;
- const struct afs_addr_list *alist;
- struct afs_server *server, *next;
+ struct afs_server *server;
struct afs_net *net = cell->net;
struct rb_node **pp, *p;
int diff;
_enter("%p", candidate);
- write_seqlock(&net->fs_lock);
-
/* Firstly install the server in the UUID lookup tree */
- pp = &net->fs_servers.rb_node;
+ pp = &cell->fs_servers.rb_node;
p = NULL;
while (*pp) {
p = *pp;
_debug("- consider %p", p);
server = rb_entry(p, struct afs_server, uuid_rb);
- diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
- if (diff < 0) {
+ diff = memcmp(&(*candidate)->uuid, &server->uuid, sizeof(uuid_t));
+ if (diff < 0)
pp = &(*pp)->rb_left;
- } else if (diff > 0) {
+ else if (diff > 0)
pp = &(*pp)->rb_right;
- } else {
- if (server->cell == cell)
- goto exists;
-
- /* We have the same UUID representing servers in
- * different cells. Append the new server to the list.
- */
- for (;;) {
- next = rcu_dereference_protected(
- server->uuid_next,
- lockdep_is_held(&net->fs_lock.lock));
- if (!next)
- break;
- server = next;
- }
- rcu_assign_pointer(server->uuid_next, candidate);
- candidate->uuid_prev = server;
- server = candidate;
- goto added_dup;
- }
+ else
+ goto exists;
}
- server = candidate;
+ server = *candidate;
+ *candidate = NULL;
rb_link_node(&server->uuid_rb, p, pp);
- rb_insert_color(&server->uuid_rb, &net->fs_servers);
+ rb_insert_color(&server->uuid_rb, &cell->fs_servers);
+ write_seqlock(&net->fs_lock);
hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
+ write_sequnlock(&net->fs_lock);
-added_dup:
- write_seqlock(&net->fs_addr_lock);
- estate = rcu_dereference_protected(server->endpoint_state,
- lockdep_is_held(&net->fs_addr_lock.lock));
- alist = estate->addresses;
-
- /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
- * it in the IPv4 and/or IPv6 reverse-map lists.
- *
- * TODO: For speed we want to use something other than a flat list
- * here; even sorting the list in terms of lowest address would help a
- * bit, but anything we might want to do gets messy and memory
- * intensive.
- */
- if (alist->nr_addrs > 0)
- hlist_add_head_rcu(&server->addr_link, &net->fs_addresses);
-
- write_sequnlock(&net->fs_addr_lock);
+ afs_get_cell(cell, afs_cell_trace_get_server);
exists:
- afs_get_server(server, afs_server_trace_get_install);
- write_sequnlock(&net->fs_lock);
+ afs_use_server(server, true, afs_server_trace_use_install);
return server;
}
/*
- * Allocate a new server record and mark it active.
+ * Allocate a new server record and mark it as active but uncreated.
*/
-static struct afs_server *afs_alloc_server(struct afs_cell *cell,
- const uuid_t *uuid,
- struct afs_addr_list *alist)
+static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *uuid)
{
- struct afs_endpoint_state *estate;
struct afs_server *server;
struct afs_net *net = cell->net;
@@ -203,65 +119,50 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
if (!server)
- goto enomem;
-
- estate = kzalloc(sizeof(struct afs_endpoint_state), GFP_KERNEL);
- if (!estate)
- goto enomem_server;
+ return NULL;
refcount_set(&server->ref, 1);
- atomic_set(&server->active, 1);
+ atomic_set(&server->active, 0);
+ __set_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
server->debug_id = atomic_inc_return(&afs_server_debug_id);
- server->addr_version = alist->version;
server->uuid = *uuid;
rwlock_init(&server->fs_lock);
+ INIT_WORK(&server->destroyer, &afs_server_destroyer);
+ timer_setup(&server->timer, afs_server_timer, 0);
INIT_LIST_HEAD(&server->volumes);
init_waitqueue_head(&server->probe_wq);
+ mutex_init(&server->cm_token_lock);
INIT_LIST_HEAD(&server->probe_link);
+ INIT_HLIST_NODE(&server->proc_link);
spin_lock_init(&server->probe_lock);
server->cell = cell;
server->rtt = UINT_MAX;
server->service_id = FS_SERVICE;
-
server->probe_counter = 1;
server->probed_at = jiffies - LONG_MAX / 2;
- refcount_set(&estate->ref, 1);
- estate->addresses = alist;
- estate->server_id = server->debug_id;
- estate->probe_seq = 1;
- rcu_assign_pointer(server->endpoint_state, estate);
afs_inc_servers_outstanding(net);
- trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc);
- trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
- afs_estate_trace_alloc_server);
_leave(" = %p", server);
return server;
-
-enomem_server:
- kfree(server);
-enomem:
- _leave(" = NULL [nomem]");
- return NULL;
}
/*
* Look up an address record for a server
*/
-static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
- struct key *key, const uuid_t *uuid)
+static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_server *server,
+ struct key *key)
{
struct afs_vl_cursor vc;
struct afs_addr_list *alist = NULL;
int ret;
ret = -ERESTARTSYS;
- if (afs_begin_vlserver_operation(&vc, cell, key)) {
+ if (afs_begin_vlserver_operation(&vc, server->cell, key)) {
while (afs_select_vlserver(&vc)) {
if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
- alist = afs_yfsvl_get_endpoints(&vc, uuid);
+ alist = afs_yfsvl_get_endpoints(&vc, &server->uuid);
else
- alist = afs_vl_get_addrs_u(&vc, uuid);
+ alist = afs_vl_get_addrs_u(&vc, &server->uuid);
}
ret = afs_end_vlserver_operation(&vc);
@@ -271,72 +172,122 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
}
/*
- * Get or create a fileserver record.
+ * Get or create a fileserver record and return it with an active-use count on
+ * it.
*/
struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
const uuid_t *uuid, u32 addr_version)
{
- struct afs_addr_list *alist;
- struct afs_server *server, *candidate;
+ struct afs_addr_list *alist = NULL;
+ struct afs_server *server, *candidate = NULL;
+ bool creating = false;
+ int ret;
_enter("%p,%pU", cell->net, uuid);
- server = afs_find_server_by_uuid(cell->net, uuid);
+ down_read(&cell->fs_lock);
+ server = afs_find_server_by_uuid(cell, uuid);
+ /* Won't see servers marked uncreated. */
+ up_read(&cell->fs_lock);
+
if (server) {
+ timer_delete_sync(&server->timer);
+ if (test_bit(AFS_SERVER_FL_CREATING, &server->flags))
+ goto wait_for_creation;
if (server->addr_version != addr_version)
set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
return server;
}
- alist = afs_vl_lookup_addrs(cell, key, uuid);
- if (IS_ERR(alist))
- return ERR_CAST(alist);
-
- candidate = afs_alloc_server(cell, uuid, alist);
+ candidate = afs_alloc_server(cell, uuid);
if (!candidate) {
afs_put_addrlist(alist, afs_alist_trace_put_server_oom);
return ERR_PTR(-ENOMEM);
}
- server = afs_install_server(cell, candidate);
- if (server != candidate) {
- afs_put_addrlist(alist, afs_alist_trace_put_server_dup);
+ down_write(&cell->fs_lock);
+ server = afs_install_server(cell, &candidate);
+ if (test_bit(AFS_SERVER_FL_CREATING, &server->flags)) {
+ /* We need to wait for creation to complete. */
+ up_write(&cell->fs_lock);
+ goto wait_for_creation;
+ }
+ if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+ set_bit(AFS_SERVER_FL_CREATING, &server->flags);
+ clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+ creating = true;
+ }
+ up_write(&cell->fs_lock);
+ timer_delete_sync(&server->timer);
+
+ /* If we get to create the server, we look up the addresses and then
+ * immediately dispatch an asynchronous probe to each interface on the
+ * fileserver. This will make sure the repeat-probing service is
+ * started.
+ */
+ if (creating) {
+ alist = afs_vl_lookup_addrs(server, key);
+ if (IS_ERR(alist)) {
+ ret = PTR_ERR(alist);
+ goto create_failed;
+ }
+
+ ret = afs_fs_probe_fileserver(cell->net, server, alist, key);
+ if (ret)
+ goto create_failed;
+
+ clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags);
+ }
+
+out:
+ afs_put_addrlist(alist, afs_alist_trace_put_server_create);
+ if (candidate) {
+ kfree(rcu_access_pointer(server->endpoint_state));
kfree(candidate);
- } else {
- /* Immediately dispatch an asynchronous probe to each interface
- * on the fileserver. This will make sure the repeat-probing
- * service is started.
- */
- afs_fs_probe_fileserver(cell->net, server, alist, key);
+ afs_dec_servers_outstanding(cell->net);
+ }
+ return server ?: ERR_PTR(ret);
+
+wait_for_creation:
+ afs_see_server(server, afs_server_trace_wait_create);
+ wait_on_bit(&server->flags, AFS_SERVER_FL_CREATING, TASK_UNINTERRUPTIBLE);
+ if (test_bit_acquire(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+ /* Barrier: read flag before error */
+ ret = READ_ONCE(server->create_error);
+ afs_put_server(cell->net, server, afs_server_trace_unuse_create_fail);
+ server = NULL;
+ goto out;
}
- return server;
-}
+ ret = 0;
+ goto out;
-/*
- * Set the server timer to fire after a given delay, assuming it's not already
- * set for an earlier time.
- */
-static void afs_set_server_timer(struct afs_net *net, time64_t delay)
-{
- if (net->live) {
- afs_inc_servers_outstanding(net);
- if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
- afs_dec_servers_outstanding(net);
+create_failed:
+ down_write(&cell->fs_lock);
+
+ WRITE_ONCE(server->create_error, ret);
+ smp_wmb(); /* Barrier: set error before flag. */
+ set_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+
+ clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags);
+
+ if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+ clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+ creating = true;
}
+ afs_unuse_server(cell->net, server, afs_server_trace_unuse_create_fail);
+ server = NULL;
+
+ up_write(&cell->fs_lock);
+ goto out;
}
/*
- * Server management timer. We have an increment on fs_outstanding that we
- * need to pass along to the work item.
+ * Set/reduce a server's timer.
*/
-void afs_servers_timer(struct timer_list *timer)
+static void afs_set_server_timer(struct afs_server *server, unsigned int delay_secs)
{
- struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
-
- _enter("");
- if (!queue_work(afs_wq, &net->fs_manager))
- afs_dec_servers_outstanding(net);
+ mod_timer(&server->timer, jiffies + delay_secs * HZ);
}
/*
@@ -355,32 +306,20 @@ struct afs_server *afs_get_server(struct afs_server *server,
}
/*
- * Try to get a reference on a server object.
+ * Get an active count on a server object and maybe remove from the inactive
+ * list.
*/
-static struct afs_server *afs_maybe_use_server(struct afs_server *server,
- enum afs_server_trace reason)
-{
- unsigned int a;
- int r;
-
- if (!__refcount_inc_not_zero(&server->ref, &r))
- return NULL;
-
- a = atomic_inc_return(&server->active);
- trace_afs_server(server->debug_id, r + 1, a, reason);
- return server;
-}
-
-/*
- * Get an active count on a server object.
- */
-struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason)
+struct afs_server *afs_use_server(struct afs_server *server, bool activate,
+ enum afs_server_trace reason)
{
unsigned int a;
int r;
__refcount_inc(&server->ref, &r);
a = atomic_inc_return(&server->active);
+ if (a == 1 && activate &&
+ !test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+ timer_delete(&server->timer);
trace_afs_server(server->debug_id, r + 1, a, reason);
return server;
@@ -413,13 +352,16 @@ void afs_put_server(struct afs_net *net, struct afs_server *server,
void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
enum afs_server_trace reason)
{
- if (server) {
- unsigned int active = atomic_dec_return(&server->active);
+ if (!server)
+ return;
- if (active == 0)
- afs_set_server_timer(net, afs_server_gc_delay);
- afs_put_server(net, server, reason);
+ if (atomic_dec_and_test(&server->active)) {
+ if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) ||
+ READ_ONCE(server->cell->state) >= AFS_CELL_REMOVING)
+ schedule_work(&server->destroyer);
}
+
+ afs_put_server(net, server, reason);
}
/*
@@ -428,10 +370,22 @@ void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
void afs_unuse_server(struct afs_net *net, struct afs_server *server,
enum afs_server_trace reason)
{
- if (server) {
- server->unuse_time = ktime_get_real_seconds();
- afs_unuse_server_notime(net, server, reason);
+ if (!server)
+ return;
+
+ if (atomic_dec_and_test(&server->active)) {
+ if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) &&
+ READ_ONCE(server->cell->state) < AFS_CELL_REMOVING) {
+ time64_t unuse_time = ktime_get_real_seconds();
+
+ server->unuse_time = unuse_time;
+ afs_set_server_timer(server, afs_server_gc_delay);
+ } else {
+ schedule_work(&server->destroyer);
+ }
}
+
+ afs_put_server(net, server, reason);
}
static void afs_server_rcu(struct rcu_head *rcu)
@@ -442,6 +396,8 @@ static void afs_server_rcu(struct rcu_head *rcu)
atomic_read(&server->active), afs_server_trace_free);
afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state),
afs_estate_trace_put_server);
+ afs_put_cell(server->cell, afs_cell_trace_put_server);
+ kfree(server->cm_rxgk_appdata.data);
kfree(server);
}
@@ -460,159 +416,119 @@ static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server
}
/*
- * destroy a dead server
+ * Check to see if the server record has expired.
*/
-static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
+static bool afs_has_server_expired(const struct afs_server *server)
{
- if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
- afs_give_up_callbacks(net, server);
+ time64_t expires_at;
- afs_put_server(net, server, afs_server_trace_destroy);
+ if (atomic_read(&server->active))
+ return false;
+
+ if (server->cell->net->live ||
+ server->cell->state >= AFS_CELL_REMOVING) {
+ trace_afs_server(server->debug_id, refcount_read(&server->ref),
+ 0, afs_server_trace_purging);
+ return true;
+ }
+
+ expires_at = server->unuse_time;
+ if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
+ !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
+ expires_at += afs_server_gc_delay;
+
+ return ktime_get_real_seconds() > expires_at;
}
/*
- * Garbage collect any expired servers.
+ * Remove a server record from it's parent cell's database.
*/
-static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
+static bool afs_remove_server_from_cell(struct afs_server *server)
{
- struct afs_server *server, *next, *prev;
- int active;
-
- while ((server = gc_list)) {
- gc_list = server->gc_next;
-
- write_seqlock(&net->fs_lock);
-
- active = atomic_read(&server->active);
- if (active == 0) {
- trace_afs_server(server->debug_id, refcount_read(&server->ref),
- active, afs_server_trace_gc);
- next = rcu_dereference_protected(
- server->uuid_next, lockdep_is_held(&net->fs_lock.lock));
- prev = server->uuid_prev;
- if (!prev) {
- /* The one at the front is in the tree */
- if (!next) {
- rb_erase(&server->uuid_rb, &net->fs_servers);
- } else {
- rb_replace_node_rcu(&server->uuid_rb,
- &next->uuid_rb,
- &net->fs_servers);
- next->uuid_prev = NULL;
- }
- } else {
- /* This server is not at the front */
- rcu_assign_pointer(prev->uuid_next, next);
- if (next)
- next->uuid_prev = prev;
- }
-
- list_del(&server->probe_link);
- hlist_del_rcu(&server->proc_link);
- if (!hlist_unhashed(&server->addr_link))
- hlist_del_rcu(&server->addr_link);
- }
- write_sequnlock(&net->fs_lock);
+ struct afs_cell *cell = server->cell;
+
+ down_write(&cell->fs_lock);
- if (active == 0)
- afs_destroy_server(net, server);
+ if (!afs_has_server_expired(server)) {
+ up_write(&cell->fs_lock);
+ return false;
}
+
+ set_bit(AFS_SERVER_FL_EXPIRED, &server->flags);
+ _debug("expire %pU %u", &server->uuid, atomic_read(&server->active));
+ afs_see_server(server, afs_server_trace_see_expired);
+ rb_erase(&server->uuid_rb, &cell->fs_servers);
+ up_write(&cell->fs_lock);
+ return true;
}
-/*
- * Manage the records of servers known to be within a network namespace. This
- * includes garbage collecting unused servers.
- *
- * Note also that we were given an increment on net->servers_outstanding by
- * whoever queued us that we need to deal with before returning.
- */
-void afs_manage_servers(struct work_struct *work)
+static void afs_server_destroyer(struct work_struct *work)
{
- struct afs_net *net = container_of(work, struct afs_net, fs_manager);
- struct afs_server *gc_list = NULL;
- struct rb_node *cursor;
- time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
- bool purging = !net->live;
-
- _enter("");
+ struct afs_endpoint_state *estate;
+ struct afs_server *server = container_of(work, struct afs_server, destroyer);
+ struct afs_net *net = server->cell->net;
- /* Trawl the server list looking for servers that have expired from
- * lack of use.
- */
- read_seqlock_excl(&net->fs_lock);
+ afs_see_server(server, afs_server_trace_see_destroyer);
- for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
- struct afs_server *server =
- rb_entry(cursor, struct afs_server, uuid_rb);
- int active = atomic_read(&server->active);
+ if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+ return;
- _debug("manage %pU %u", &server->uuid, active);
+ if (!afs_remove_server_from_cell(server))
+ return;
- if (purging) {
- trace_afs_server(server->debug_id, refcount_read(&server->ref),
- active, afs_server_trace_purging);
- if (active != 0)
- pr_notice("Can't purge s=%08x\n", server->debug_id);
- }
+ timer_shutdown_sync(&server->timer);
+ cancel_work(&server->destroyer);
- if (active == 0) {
- time64_t expire_at = server->unuse_time;
-
- if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
- !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
- expire_at += afs_server_gc_delay;
- if (purging || expire_at <= now) {
- server->gc_next = gc_list;
- gc_list = server;
- } else if (expire_at < next_manage) {
- next_manage = expire_at;
- }
- }
- }
+ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+ afs_give_up_callbacks(net, server);
- read_sequnlock_excl(&net->fs_lock);
+ /* Unbind the rxrpc_peer records from the server. */
+ estate = rcu_access_pointer(server->endpoint_state);
+ if (estate)
+ afs_set_peer_appdata(server, estate->addresses, NULL);
- /* Update the timer on the way out. We have to pass an increment on
- * servers_outstanding in the namespace that we are in to the timer or
- * the work scheduler.
- */
- if (!purging && next_manage < TIME64_MAX) {
- now = ktime_get_real_seconds();
+ write_seqlock(&net->fs_lock);
+ list_del_init(&server->probe_link);
+ if (!hlist_unhashed(&server->proc_link))
+ hlist_del_rcu(&server->proc_link);
+ write_sequnlock(&net->fs_lock);
- if (next_manage - now <= 0) {
- if (queue_work(afs_wq, &net->fs_manager))
- afs_inc_servers_outstanding(net);
- } else {
- afs_set_server_timer(net, next_manage - now);
- }
- }
+ afs_put_server(net, server, afs_server_trace_destroy);
+}
- afs_gc_servers(net, gc_list);
+static void afs_server_timer(struct timer_list *timer)
+{
+ struct afs_server *server = container_of(timer, struct afs_server, timer);
- afs_dec_servers_outstanding(net);
- _leave(" [%d]", atomic_read(&net->servers_outstanding));
+ afs_see_server(server, afs_server_trace_see_timer);
+ if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+ schedule_work(&server->destroyer);
}
-static void afs_queue_server_manager(struct afs_net *net)
+/*
+ * Wake up all the servers in a cell so that they can purge themselves.
+ */
+void afs_purge_servers(struct afs_cell *cell)
{
- afs_inc_servers_outstanding(net);
- if (!queue_work(afs_wq, &net->fs_manager))
- afs_dec_servers_outstanding(net);
+ struct afs_server *server;
+ struct rb_node *rb;
+
+ down_read(&cell->fs_lock);
+ for (rb = rb_first(&cell->fs_servers); rb; rb = rb_next(rb)) {
+ server = rb_entry(rb, struct afs_server, uuid_rb);
+ afs_see_server(server, afs_server_trace_see_purge);
+ schedule_work(&server->destroyer);
+ }
+ up_read(&cell->fs_lock);
}
/*
- * Purge list of servers.
+ * Wait for outstanding servers.
*/
-void afs_purge_servers(struct afs_net *net)
+void afs_wait_for_servers(struct afs_net *net)
{
_enter("");
- if (del_timer_sync(&net->fs_timer))
- afs_dec_servers_outstanding(net);
-
- afs_queue_server_manager(net);
-
- _debug("wait");
atomic_dec(&net->servers_outstanding);
wait_var_event(&net->servers_outstanding,
!atomic_read(&net->servers_outstanding));
@@ -636,7 +552,7 @@ static noinline bool afs_update_server_record(struct afs_operation *op,
atomic_read(&server->active),
afs_server_trace_update);
- alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid);
+ alist = afs_vl_lookup_addrs(server, op->key);
if (IS_ERR(alist)) {
rcu_read_lock();
estate = rcu_dereference(server->endpoint_state);
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 7e7e567a7f8a..20d5474837df 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -16,7 +16,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
if (slist && refcount_dec_and_test(&slist->usage)) {
for (i = 0; i < slist->nr_servers; i++)
afs_unuse_server(net, slist->servers[i].server,
- afs_server_trace_put_slist);
+ afs_server_trace_unuse_slist);
kfree_rcu(slist, rcu);
}
}
@@ -97,8 +97,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
- afs_put_server(volume->cell->net, server,
- afs_server_trace_put_slist_isort);
+ afs_unuse_server_notime(volume->cell->net, server,
+ afs_server_trace_unuse_slist_isort);
continue;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f3ba1c3e72f5..25b306db6992 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -194,8 +194,6 @@ static int afs_show_options(struct seq_file *m, struct dentry *root)
if (as->dyn_root)
seq_puts(m, ",dyn");
- if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
- seq_puts(m, ",autocell");
switch (as->flock_mode) {
case afs_flock_mode_unset: break;
case afs_flock_mode_local: p = "local"; break;
@@ -292,13 +290,14 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
/* lookup the cell record */
if (cellname) {
cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
- NULL, false);
+ NULL, false,
+ afs_cell_trace_use_lookup_mount);
if (IS_ERR(cell)) {
pr_err("kAFS: unable to lookup cell '%*.*s'\n",
cellnamesz, cellnamesz, cellname ?: "");
return PTR_ERR(cell);
}
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_parse);
afs_see_cell(cell, afs_cell_trace_see_source);
ctx->cell = cell;
}
@@ -395,7 +394,7 @@ static int afs_validate_fc(struct fs_context *fc)
ctx->key = NULL;
cell = afs_use_cell(ctx->cell->alias_of,
afs_cell_trace_use_fc_alias);
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
ctx->cell = cell;
goto reget_key;
}
@@ -468,7 +467,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
/* allocate the root inode and dentry */
if (as->dyn_root) {
- inode = afs_iget_pseudo_dir(sb, true);
+ inode = afs_dynroot_iget_root(sb);
} else {
sprintf(sb->s_id, "%llu", as->volume->vid);
afs_activate_volume(as->volume);
@@ -478,9 +477,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
if (IS_ERR(inode))
return PTR_ERR(inode);
- if (ctx->autocell || as->dyn_root)
- set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
-
ret = -ENOMEM;
sb->s_root = d_make_root(inode);
if (!sb->s_root)
@@ -488,9 +484,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
if (as->dyn_root) {
sb->s_d_op = &afs_dynroot_dentry_operations;
- ret = afs_dynroot_populate(sb);
- if (ret < 0)
- goto error;
} else {
sb->s_d_op = &afs_fs_dentry_operations;
rcu_assign_pointer(as->volume->sb, sb);
@@ -527,9 +520,8 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
static void afs_destroy_sbi(struct afs_super_info *as)
{
if (as) {
- struct afs_net *net = afs_net(as->net_ns);
afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi);
- afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi);
+ afs_unuse_cell(as->cell, afs_cell_trace_unuse_sbi);
put_net(as->net_ns);
kfree(as);
}
@@ -539,9 +531,6 @@ static void afs_kill_super(struct super_block *sb)
{
struct afs_super_info *as = AFS_FS_S(sb);
- if (as->dyn_root)
- afs_dynroot_depopulate(sb);
-
/* Clear the callback interests (which will do ilookup5) before
* deactivating the superblock.
*/
@@ -615,7 +604,7 @@ static void afs_free_fc(struct fs_context *fc)
afs_destroy_sbi(fc->s_fs_info);
afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc);
- afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
+ afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
key_put(ctx->key);
kfree(ctx);
}
@@ -663,7 +652,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->netfs.inode);
- mutex_init(&vnode->io_lock);
+ INIT_LIST_HEAD(&vnode->io_lock_waiters);
init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
@@ -696,6 +685,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
vnode->volume = NULL;
vnode->lock_key = NULL;
vnode->permit_cache = NULL;
+ vnode->directory = NULL;
+ vnode->directory_size = 0;
vnode->flags = 1 << AFS_VNODE_UNSET;
vnode->lock_state = AFS_VNODE_LOCK_NONE;
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
index 32a53fc8dfb2..0ba8336c9025 100644
--- a/fs/afs/validation.c
+++ b/fs/afs/validation.c
@@ -120,22 +120,31 @@
bool afs_check_validity(const struct afs_vnode *vnode)
{
const struct afs_volume *volume = vnode->volume;
+ enum afs_vnode_invalid_trace trace = afs_vnode_valid_trace;
+ time64_t cb_expires_at = atomic64_read(&vnode->cb_expires_at);
time64_t deadline = ktime_get_real_seconds() + 10;
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
return true;
- if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
- atomic64_read(&vnode->cb_expires_at) <= deadline ||
- volume->cb_expires_at <= deadline ||
- vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot) ||
- vnode->cb_scrub != atomic_read(&volume->cb_scrub) ||
- test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- _debug("inval");
- return false;
- }
-
- return true;
+ if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break))
+ trace = afs_vnode_invalid_trace_cb_v_break;
+ else if (cb_expires_at == AFS_NO_CB_PROMISE)
+ trace = afs_vnode_invalid_trace_no_cb_promise;
+ else if (cb_expires_at <= deadline)
+ trace = afs_vnode_invalid_trace_expired;
+ else if (volume->cb_expires_at <= deadline)
+ trace = afs_vnode_invalid_trace_vol_expired;
+ else if (vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot))
+ trace = afs_vnode_invalid_trace_cb_ro_snapshot;
+ else if (vnode->cb_scrub != atomic_read(&volume->cb_scrub))
+ trace = afs_vnode_invalid_trace_cb_scrub;
+ else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ trace = afs_vnode_invalid_trace_zap_data;
+ else
+ return true;
+ trace_afs_vnode_invalid(vnode, trace);
+ return false;
}
/*
@@ -365,9 +374,9 @@ static void afs_zap_data(struct afs_vnode *vnode)
* written back in a regular file and completely discard the pages in a
* directory or symlink */
if (S_ISREG(vnode->netfs.inode.i_mode))
- invalidate_remote_inode(&vnode->netfs.inode);
+ filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX);
else
- invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
+ filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX);
}
/*
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index 9f36e14f1c2d..709b4cdb723e 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -205,11 +205,11 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
goto is_alias;
if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
- afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
+ afs_unuse_cell(p, afs_cell_trace_unuse_check_alias);
return -ERESTARTSYS;
}
- afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
+ afs_unuse_cell(p, afs_cell_trace_unuse_check_alias);
}
mutex_unlock(&cell->net->proc_cells_lock);
@@ -253,6 +253,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
{
struct afs_cell *master;
+ size_t name_len;
char *cell_name;
cell_name = afs_vl_get_cell_name(cell, key);
@@ -264,8 +265,12 @@ static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
return 0;
}
- master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name),
- NULL, false);
+ name_len = strlen(cell_name);
+ if (!name_len || name_len > AFS_MAXCELLNAME)
+ master = ERR_PTR(-EOPNOTSUPP);
+ else
+ master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, false,
+ afs_cell_trace_use_lookup_canonical);
kfree(cell_name);
if (IS_ERR(master))
return PTR_ERR(master);
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index d8f79f6ada3d..6ad9688d8f4b 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -48,7 +48,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
cell->dns_expiry <= ktime_get_real_seconds()) {
dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
- afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
+ afs_queue_cell(cell, afs_cell_trace_queue_dns);
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
if (wait_var_event_interruptible(
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index cac75f89b64a..3a23c0b08eb6 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -370,6 +370,7 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
.name = "VL.GetCapabilities",
.op = afs_VL_GetCapabilities,
.deliver = afs_deliver_vl_get_capabilities,
+ .immediate_cancel = afs_vlserver_probe_result,
.done = afs_vlserver_probe_result,
.destructor = afs_destroy_vl_get_capabilities,
};
@@ -697,7 +698,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
return ret;
namesz = ntohl(call->tmp);
- if (namesz > AFS_MAXCELLNAME)
+ if (namesz > YFS_VL_MAXCELLNAME)
return afs_protocol_error(call, afs_eproto_cellname_len);
paddedsz = (namesz + 3) & ~3;
call->count = namesz;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index af3a3f57c1b3..0efff3d25133 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -10,6 +10,7 @@
#include "internal.h"
static unsigned __read_mostly afs_volume_record_life = 60 * 60;
+static atomic_t afs_volume_debug_id;
static void afs_destroy_volume(struct work_struct *work);
@@ -59,7 +60,7 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
struct afs_cell *cell = volume->cell;
if (!hlist_unhashed(&volume->proc_link)) {
- trace_afs_volume(volume->vid, refcount_read(&cell->ref),
+ trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref),
afs_volume_trace_remove);
write_seqlock(&cell->volume_lock);
hlist_del_rcu(&volume->proc_link);
@@ -84,6 +85,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
if (!volume)
goto error_0;
+ volume->debug_id = atomic_inc_return(&afs_volume_debug_id);
volume->vid = vldb->vid[params->type];
volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol);
@@ -115,7 +117,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
*_slist = slist;
rcu_assign_pointer(volume->servers, slist);
- trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
+ trace_afs_volume(volume->debug_id, volume->vid, 1, afs_volume_trace_alloc);
return volume;
error_1:
@@ -247,7 +249,7 @@ static void afs_destroy_volume(struct work_struct *work)
afs_remove_volume_from_cell(volume);
afs_put_serverlist(volume->cell->net, slist);
afs_put_cell(volume->cell, afs_cell_trace_put_vol);
- trace_afs_volume(volume->vid, refcount_read(&volume->ref),
+ trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref),
afs_volume_trace_free);
kfree_rcu(volume, rcu);
@@ -262,7 +264,7 @@ bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
int r;
if (__refcount_inc_not_zero(&volume->ref, &r)) {
- trace_afs_volume(volume->vid, r + 1, reason);
+ trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason);
return true;
}
return false;
@@ -278,7 +280,7 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume,
int r;
__refcount_inc(&volume->ref, &r);
- trace_afs_volume(volume->vid, r + 1, reason);
+ trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason);
}
return volume;
}
@@ -290,12 +292,13 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume,
void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason)
{
if (volume) {
+ unsigned int debug_id = volume->debug_id;
afs_volid_t vid = volume->vid;
bool zero;
int r;
zero = __refcount_dec_and_test(&volume->ref, &r);
- trace_afs_volume(vid, r - 1, reason);
+ trace_afs_volume(debug_id, vid, r - 1, reason);
if (zero)
schedule_work(&volume->destructor);
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 74402d95a884..18b0a9f1615e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -29,43 +29,39 @@ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsign
/*
* Find a key to use for the writeback. We cached the keys used to author the
- * writes on the vnode. *_wbk will contain the last writeback key used or NULL
- * and we need to start from there if it's set.
+ * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key
+ * record used or NULL and we need to start from there if it's set.
+ * wreq->netfs_priv will be set to the key itself or NULL.
*/
-static int afs_get_writeback_key(struct afs_vnode *vnode,
- struct afs_wb_key **_wbk)
+static void afs_get_writeback_key(struct netfs_io_request *wreq)
{
- struct afs_wb_key *wbk = NULL;
- struct list_head *p;
- int ret = -ENOKEY, ret2;
+ struct afs_wb_key *wbk, *old = wreq->netfs_priv2;
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+
+ key_put(wreq->netfs_priv);
+ wreq->netfs_priv = NULL;
+ wreq->netfs_priv2 = NULL;
spin_lock(&vnode->wb_lock);
- if (*_wbk)
- p = (*_wbk)->vnode_link.next;
+ if (old)
+ wbk = list_next_entry(old, vnode_link);
else
- p = vnode->wb_keys.next;
+ wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link);
- while (p != &vnode->wb_keys) {
- wbk = list_entry(p, struct afs_wb_key, vnode_link);
+ list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) {
_debug("wbk %u", key_serial(wbk->key));
- ret2 = key_validate(wbk->key);
- if (ret2 == 0) {
+ if (key_validate(wbk->key) == 0) {
refcount_inc(&wbk->usage);
+ wreq->netfs_priv = key_get(wbk->key);
+ wreq->netfs_priv2 = wbk;
_debug("USE WB KEY %u", key_serial(wbk->key));
break;
}
-
- wbk = NULL;
- if (ret == -ENOKEY)
- ret = ret2;
- p = p->next;
}
spin_unlock(&vnode->wb_lock);
- if (*_wbk)
- afs_put_wb_key(*_wbk);
- *_wbk = wbk;
- return 0;
+
+ afs_put_wb_key(old);
}
static void afs_store_data_success(struct afs_operation *op)
@@ -75,8 +71,7 @@ static void afs_store_data_success(struct afs_operation *op)
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
if (!afs_op_error(op)) {
- if (!op->store.laundering)
- afs_pages_written_back(vnode, op->store.pos, op->store.size);
+ afs_pages_written_back(vnode, op->store.pos, op->store.size);
afs_stat_v(vnode, n_stores);
atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
}
@@ -89,113 +84,142 @@ static const struct afs_operation_ops afs_store_data_operation = {
};
/*
- * write to a file
+ * Prepare a subrequest to write to the server. This sets the max_len
+ * parameter.
+ */
+void afs_prepare_write(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_stream *stream = &subreq->rreq->io_streams[subreq->stream_nr];
+
+ //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags))
+ // subreq->max_len = 512 * 1024;
+ //else
+ stream->sreq_max_len = 256 * 1024 * 1024;
+}
+
+/*
+ * Issue a subrequest to write to the server.
*/
-static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
- bool laundering)
+static void afs_issue_write_worker(struct work_struct *work)
{
+ struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work);
+ struct netfs_io_request *wreq = subreq->rreq;
struct afs_operation *op;
- struct afs_wb_key *wbk = NULL;
- loff_t size = iov_iter_count(iter);
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+ unsigned long long pos = subreq->start + subreq->transferred;
+ size_t len = subreq->len - subreq->transferred;
int ret = -ENOKEY;
- _enter("%s{%llx:%llu.%u},%llx,%llx",
+ _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx",
+ wreq->debug_id, subreq->debug_index,
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- size, pos);
+ pos, len);
- ret = afs_get_writeback_key(vnode, &wbk);
- if (ret) {
- _leave(" = %d [no keys]", ret);
- return ret;
- }
+#if 0 // Error injection
+ if (subreq->debug_index == 3)
+ return netfs_write_subrequest_terminated(subreq, -ENOANO, false);
- op = afs_alloc_operation(wbk->key, vnode->volume);
- if (IS_ERR(op)) {
- afs_put_wb_key(wbk);
- return -ENOMEM;
+ if (!subreq->retry_count) {
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
}
+#endif
+
+ op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
+ if (IS_ERR(op))
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
afs_op_set_vnode(op, 0, vnode);
- op->file[0].dv_delta = 1;
+ op->file[0].dv_delta = 1;
op->file[0].modification = true;
- op->store.pos = pos;
- op->store.size = size;
- op->store.laundering = laundering;
- op->flags |= AFS_OPERATION_UNINTR;
- op->ops = &afs_store_data_operation;
+ op->store.pos = pos;
+ op->store.size = len;
+ op->flags |= AFS_OPERATION_UNINTR;
+ op->ops = &afs_store_data_operation;
-try_next_key:
afs_begin_vnode_operation(op);
- op->store.write_iter = iter;
- op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
- op->mtime = inode_get_mtime(&vnode->netfs.inode);
+ op->store.write_iter = &subreq->io_iter;
+ op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size);
+ op->mtime = inode_get_mtime(&vnode->netfs.inode);
afs_wait_for_operation(op);
-
- switch (afs_op_error(op)) {
+ ret = afs_put_operation(op);
+ switch (ret) {
+ case 0:
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ break;
case -EACCES:
case -EPERM:
case -ENOKEY:
case -EKEYEXPIRED:
case -EKEYREJECTED:
case -EKEYREVOKED:
- _debug("next");
-
- ret = afs_get_writeback_key(vnode, &wbk);
- if (ret == 0) {
- key_put(op->key);
- op->key = key_get(wbk->key);
- goto try_next_key;
- }
+ /* If there are more keys we can try, use the retry algorithm
+ * to rotate the keys.
+ */
+ if (wreq->netfs_priv2)
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
break;
}
- afs_put_wb_key(wbk);
- _leave(" = %d", afs_op_error(op));
- return afs_put_operation(op);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false);
}
-static void afs_upload_to_server(struct netfs_io_subrequest *subreq)
+void afs_issue_write(struct netfs_io_subrequest *subreq)
{
- struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
- ssize_t ret;
-
- _enter("%x[%x],%zx",
- subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count);
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- ret = afs_store_data(vnode, &subreq->io_iter, subreq->start,
- subreq->rreq->origin == NETFS_LAUNDER_WRITE);
- netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len,
- false);
+ subreq->work.func = afs_issue_write_worker;
+ if (!queue_work(system_unbound_wq, &subreq->work))
+ WARN_ON_ONCE(1);
}
-static void afs_upload_to_server_worker(struct work_struct *work)
+/*
+ * Writeback calls this when it finds a folio that needs uploading. This isn't
+ * called if writeback only has copy-to-cache to deal with.
+ */
+void afs_begin_writeback(struct netfs_io_request *wreq)
{
- struct netfs_io_subrequest *subreq =
- container_of(work, struct netfs_io_subrequest, work);
-
- afs_upload_to_server(subreq);
+ if (S_ISREG(wreq->inode->i_mode))
+ afs_get_writeback_key(wreq);
}
/*
- * Set up write requests for a writeback slice. We need to add a write request
- * for each write we want to make.
+ * Prepare to retry the writes in request. Use this to try rotating the
+ * available writeback keys.
*/
-void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream)
{
- struct netfs_io_subrequest *subreq;
-
- _enter("%x,%llx-%llx", wreq->debug_id, start, start + len);
+ struct netfs_io_subrequest *subreq =
+ list_first_entry(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+
+ switch (wreq->origin) {
+ case NETFS_READAHEAD:
+ case NETFS_READPAGE:
+ case NETFS_READ_GAPS:
+ case NETFS_READ_SINGLE:
+ case NETFS_READ_FOR_WRITE:
+ case NETFS_DIO_READ:
+ return;
+ default:
+ break;
+ }
- subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
- start, len, afs_upload_to_server_worker);
- if (subreq)
- netfs_queue_write_request(subreq);
+ switch (subreq->error) {
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ afs_get_writeback_key(wreq);
+ if (!wreq->netfs_priv)
+ stream->failed = true;
+ break;
+ }
}
/*
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
index 8ca868164507..cc5f143d21a3 100644
--- a/fs/afs/xdr_fs.h
+++ b/fs/afs/xdr_fs.h
@@ -88,7 +88,7 @@ union afs_xdr_dir_block {
struct {
struct afs_xdr_dir_hdr hdr;
- u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS];
+ u8 alloc_ctrs[AFS_DIR_BLOCKS_WITH_CTR];
__be16 hashtable[AFS_DIR_HASHTBL_SIZE];
} meta;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index f521e66d3bf6..257af259c04a 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -352,18 +352,19 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call)
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
{
struct afs_operation *op = call->op;
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
const __be32 *bp;
+ size_t count_before;
int ret;
_enter("{%u,%zu, %zu/%llu}",
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
- req->actual_len);
+ call->remaining);
switch (call->unmarshall) {
case 0:
- req->actual_len = 0;
+ call->remaining = 0;
afs_extract_to_tmp64(call);
call->unmarshall++;
fallthrough;
@@ -378,38 +379,39 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
if (ret < 0)
return ret;
- req->actual_len = be64_to_cpu(call->tmp64);
- _debug("DATA length: %llu", req->actual_len);
+ call->remaining = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", call->remaining);
- if (req->actual_len == 0)
+ if (call->remaining == 0)
goto no_more_data;
- call->iter = req->iter;
- call->iov_len = min(req->actual_len, req->len);
+ call->iter = &subreq->io_iter;
+ call->iov_len = min(call->remaining, subreq->len - subreq->transferred);
call->unmarshall++;
fallthrough;
/* extract the returned data */
case 2:
- _debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->actual_len);
+ count_before = call->iov_len;
+ _debug("extract data %zu/%llu", count_before, call->remaining);
ret = afs_extract_data(call, true);
+ subreq->transferred += count_before - call->iov_len;
if (ret < 0)
return ret;
call->iter = &call->def_iter;
- if (req->actual_len <= req->len)
+ if (call->remaining)
goto no_more_data;
/* Discard any excess data the server gave us */
- afs_extract_discard(call, req->actual_len - req->len);
+ afs_extract_discard(call, call->remaining);
call->unmarshall = 3;
fallthrough;
case 3:
_debug("extract discard %zu/%llu",
- iov_iter_count(call->iter), req->actual_len - req->len);
+ iov_iter_count(call->iter), call->remaining);
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -434,8 +436,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
xdr_decode_YFSCallBack(&bp, call, &vp->scb);
xdr_decode_YFSVolSync(&bp, &op->volsync);
- req->data_version = vp->scb.status.data_version;
- req->file_size = vp->scb.status.size;
+ if (subreq->start + subreq->transferred >= vp->scb.status.size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
call->unmarshall++;
fallthrough;
@@ -454,7 +456,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
static const struct afs_call_type yfs_RXYFSFetchData64 = {
.name = "YFS.FetchData64",
.op = yfs_FS_FetchData64,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = yfs_deliver_fs_fetch_data64,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
@@ -463,14 +467,15 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = {
*/
void yfs_fs_fetch_data(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
struct afs_call *call;
__be32 *bp;
- _enter(",%x,{%llx:%llu},%llx,%llx",
+ _enter(",%x,{%llx:%llu},%llx,%zx",
key_serial(op->key), vp->fid.vid, vp->fid.vnode,
- req->pos, req->len);
+ subreq->start + subreq->transferred,
+ subreq->len - subreq->transferred);
call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64,
sizeof(__be32) * 2 +
@@ -482,15 +487,16 @@ void yfs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- req->call_debug_id = call->debug_id;
+ if (op->flags & AFS_OPERATION_ASYNC)
+ call->async = true;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSFETCHDATA64);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &vp->fid);
- bp = xdr_encode_u64(bp, req->pos);
- bp = xdr_encode_u64(bp, req->len);
+ bp = xdr_encode_u64(bp, subreq->start + subreq->transferred);
+ bp = xdr_encode_u64(bp, subreq->len - subreq->transferred);
yfs_check_req(call, bp);
call->fid = vp->fid;
@@ -661,8 +667,9 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call)
static void yfs_done_fs_remove_file2(struct afs_call *call)
{
if (call->error == -ECONNABORTED &&
- call->abort_code == RX_INVALID_OPERATION) {
- set_bit(AFS_SERVER_FL_NO_RM2, &call->server->flags);
+ (call->abort_code == RX_INVALID_OPERATION ||
+ call->abort_code == RXGEN_OPCODE)) {
+ set_bit(AFS_SERVER_FL_NO_RM2, &call->op->server->flags);
call->op->flags |= AFS_OPERATION_DOWNGRADE;
}
}