diff options
Diffstat (limited to 'fs/afs')
38 files changed, 3173 insertions, 2003 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index fc8ba9142f2f..682bd8ec2c10 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -5,6 +5,7 @@ config AFS_FS select AF_RXRPC select DNS_RESOLVER select NETFS_SUPPORT + select CRYPTO_KRB5 help If you say Y here, you will get an experimental Andrew File System driver. It currently only supports unsecured read-only AFS access. diff --git a/fs/afs/Makefile b/fs/afs/Makefile index dcdc0f1bb76f..b49b8fe682f3 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -8,9 +8,11 @@ kafs-y := \ addr_prefs.o \ callback.o \ cell.o \ + cm_security.o \ cmservice.o \ dir.o \ dir_edit.o \ + dir_search.o \ dir_silly.o \ dynroot.o \ file.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 6d42f85c6be5..e941da5b6dd9 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -362,3 +362,53 @@ int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist, alist->nr_addrs++; return 0; } + +/* + * Set the app data on the rxrpc peers an address list points to + */ +void afs_set_peer_appdata(struct afs_server *server, + struct afs_addr_list *old_alist, + struct afs_addr_list *new_alist) +{ + unsigned long data = (unsigned long)server; + int n = 0, o = 0; + + if (!old_alist) { + /* New server. Just set all. */ + for (; n < new_alist->nr_addrs; n++) + rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data); + return; + } + if (!new_alist) { + /* Dead server. Just remove all. */ + for (; o < old_alist->nr_addrs; o++) + rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0); + return; + } + + /* Walk through the two lists simultaneously, setting new peers and + * clearing old ones. The two lists are ordered by pointer to peer + * record. + */ + while (n < new_alist->nr_addrs && o < old_alist->nr_addrs) { + struct rxrpc_peer *pn = new_alist->addrs[n].peer; + struct rxrpc_peer *po = old_alist->addrs[o].peer; + + if (pn == po) + continue; + if (pn < po) { + rxrpc_kernel_set_peer_data(pn, data); + n++; + } else { + rxrpc_kernel_set_peer_data(po, 0); + o++; + } + } + + if (n < new_alist->nr_addrs) + for (; n < new_alist->nr_addrs; n++) + rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data); + if (o < old_alist->nr_addrs) + for (; o < old_alist->nr_addrs; o++) + rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0); +} diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c index a189ff8a5034..c0384201b8fe 100644 --- a/fs/afs/addr_prefs.c +++ b/fs/afs/addr_prefs.c @@ -413,8 +413,10 @@ int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size) do { argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv)); - if (argc < 0) - return argc; + if (argc < 0) { + ret = argc; + goto done; + } if (argc < 2) goto inval; diff --git a/fs/afs/afs.h b/fs/afs/afs.h index b488072aee87..ec3db00bd081 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -10,7 +10,7 @@ #include <linux/in.h> -#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */ +#define AFS_MAXCELLNAME 253 /* Maximum length of a cell name (DNS limited) */ #define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ #define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */ #define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 9c65ffb8a523..b835e25a2c02 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -13,6 +13,7 @@ #define AFS_VL_PORT 7003 /* volume location service port */ #define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ #define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */ +#define YFS_VL_MAXCELLNAME 256 /* Maximum length of a cell name in YFS protocol */ enum AFSVL_Operations { VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */ @@ -134,13 +135,4 @@ struct afs_uvldbentry__xdr { __be32 spares9; }; -struct afs_address_list { - refcount_t usage; - unsigned int version; - unsigned int nr_addrs; - struct sockaddr_rxrpc addrs[]; -}; - -extern void afs_put_address_list(struct afs_address_list *alist); - #endif /* AFS_VL_H */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 99b2c8172021..69e1dd55b160 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -41,7 +41,7 @@ static void afs_volume_init_callback(struct afs_volume *volume) list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) { if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) { - atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); + afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb); queue_work(system_unbound_wq, &vnode->cb_work); } } @@ -79,7 +79,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas _enter(""); clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) { + if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) { vnode->cb_break++; vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break); afs_clear_permits(vnode); diff --git a/fs/afs/cell.c b/fs/afs/cell.c index caa09875f520..0168bbf53fe0 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -20,8 +20,9 @@ static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; static atomic_t cell_debug_id; -static void afs_queue_cell_manager(struct afs_net *); -static void afs_manage_cell_work(struct work_struct *); +static void afs_cell_timer(struct timer_list *timer); +static void afs_destroy_cell_work(struct work_struct *work); +static void afs_manage_cell_work(struct work_struct *work); static void afs_dec_cells_outstanding(struct afs_net *net) { @@ -29,19 +30,11 @@ static void afs_dec_cells_outstanding(struct afs_net *net) wake_up_var(&net->cells_outstanding); } -/* - * Set the cell timer to fire after a given delay, assuming it's not already - * set for an earlier time. - */ -static void afs_set_cell_timer(struct afs_net *net, time64_t delay) +static void afs_set_cell_state(struct afs_cell *cell, enum afs_cell_state state) { - if (net->live) { - atomic_inc(&net->cells_outstanding); - if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) - afs_dec_cells_outstanding(net); - } else { - afs_queue_cell_manager(net); - } + smp_store_release(&cell->state, state); /* Commit cell changes before state */ + smp_wmb(); /* Set cell state before task state */ + wake_up_var(&cell->state); } /* @@ -64,7 +57,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, return ERR_PTR(-ENAMETOOLONG); if (!name) { - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, + lockdep_is_held(&net->cells_lock)); if (!cell) return ERR_PTR(-EDESTADDRREQ); goto found; @@ -115,7 +109,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, const char *addresses) { - struct afs_vlserver_list *vllist; + struct afs_vlserver_list *vllist = NULL; struct afs_cell *cell; int i, ret; @@ -146,27 +140,31 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, return ERR_PTR(-ENOMEM); } - cell->name = kmalloc(namelen + 1, GFP_KERNEL); + cell->name = kmalloc(1 + namelen + 1, GFP_KERNEL); if (!cell->name) { kfree(cell); return ERR_PTR(-ENOMEM); } - cell->net = net; + cell->name[0] = '.'; + cell->name++; cell->name_len = namelen; for (i = 0; i < namelen; i++) cell->name[i] = tolower(name[i]); cell->name[i] = 0; + cell->net = net; refcount_set(&cell->ref, 1); atomic_set(&cell->active, 0); + INIT_WORK(&cell->destroyer, afs_destroy_cell_work); INIT_WORK(&cell->manager, afs_manage_cell_work); + timer_setup(&cell->management_timer, afs_cell_timer, 0); init_rwsem(&cell->vs_lock); cell->volumes = RB_ROOT; INIT_HLIST_HEAD(&cell->proc_volumes); seqlock_init(&cell->volume_lock); cell->fs_servers = RB_ROOT; - seqlock_init(&cell->fs_lock); + init_rwsem(&cell->fs_lock); rwlock_init(&cell->vl_servers_lock); cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); @@ -201,7 +199,13 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, cell->dns_status = vllist->status; smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ atomic_inc(&net->cells_outstanding); + ret = idr_alloc_cyclic(&net->cells_dyn_ino, cell, + 2, INT_MAX / 2, GFP_KERNEL); + if (ret < 0) + goto error; + cell->dynroot_ino = ret; cell->debug_id = atomic_inc_return(&cell_debug_id); + trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc); _leave(" = %p", cell); @@ -211,7 +215,8 @@ parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); error: - kfree(cell->name); + afs_put_vlserverlist(cell->net, vllist); + kfree(cell->name - 1); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -224,6 +229,7 @@ error: * @namesz: The strlen of the cell name. * @vllist: A colon/comma separated list of numeric IP addresses or NULL. * @excl: T if an error should be given if the cell name already exists. + * @trace: The reason to be logged if the lookup is successful. * * Look up a cell record by name and query the DNS for VL server addresses if * needed. Note that that actual DNS query is punted off to the manager thread @@ -232,7 +238,8 @@ error: */ struct afs_cell *afs_lookup_cell(struct afs_net *net, const char *name, unsigned int namesz, - const char *vllist, bool excl) + const char *vllist, bool excl, + enum afs_cell_trace trace) { struct afs_cell *cell, *candidate, *cursor; struct rb_node *parent, **pp; @@ -242,7 +249,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, _enter("%s,%s", name, vllist); if (!excl) { - cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup); + cell = afs_find_cell(net, name, namesz, trace); if (!IS_ERR(cell)) goto wait_for_cell; } @@ -285,26 +292,28 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, cell = candidate; candidate = NULL; - atomic_set(&cell->active, 2); - trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert); + afs_use_cell(cell, trace); rb_link_node_rcu(&cell->net_node, parent, pp); rb_insert_color(&cell->net_node, &net->cells); up_write(&net->cells_lock); - afs_queue_cell(cell, afs_cell_trace_get_queue_new); + afs_queue_cell(cell, afs_cell_trace_queue_new); wait_for_cell: - trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active), - afs_cell_trace_wait); _debug("wait_for_cell"); - wait_var_event(&cell->state, - ({ - state = smp_load_acquire(&cell->state); /* vs error */ - state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED; - })); + state = smp_load_acquire(&cell->state); /* vs error */ + if (state != AFS_CELL_ACTIVE && + state != AFS_CELL_DEAD) { + afs_see_cell(cell, afs_cell_trace_wait); + wait_var_event(&cell->state, + ({ + state = smp_load_acquire(&cell->state); /* vs error */ + state == AFS_CELL_ACTIVE || state == AFS_CELL_DEAD; + })); + } /* Check the state obtained from the wait check. */ - if (state == AFS_CELL_REMOVED) { + if (state == AFS_CELL_DEAD) { ret = cell->error; goto error; } @@ -318,7 +327,7 @@ cell_already_exists: if (excl) { ret = -EEXIST; } else { - afs_use_cell(cursor, afs_cell_trace_use_lookup); + afs_use_cell(cursor, trace); ret = 0; } up_write(&net->cells_lock); @@ -328,7 +337,7 @@ cell_already_exists: goto wait_for_cell; goto error_noput; error: - afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup); + afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_error); error_noput: _leave(" = %d [error]", ret); return ERR_PTR(ret); @@ -365,8 +374,17 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) len = cp - rootcell; } - /* allocate a cell record for the root cell */ - new_root = afs_lookup_cell(net, rootcell, len, vllist, false); + if (len == 0 || !rootcell[0] || rootcell[0] == '.' || rootcell[len - 1] == '.') + return -EINVAL; + if (memchr(rootcell, '/', len)) + return -EINVAL; + cp = strstr(rootcell, ".."); + if (cp && cp < rootcell + len) + return -EINVAL; + + /* allocate a cell record for the root/workstation cell */ + new_root = afs_lookup_cell(net, rootcell, len, vllist, false, + afs_cell_trace_use_lookup_ws); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); return PTR_ERR(new_root); @@ -377,12 +395,11 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* install the new cell */ down_write(&net->cells_lock); - afs_see_cell(new_root, afs_cell_trace_see_ws); - old_root = net->ws_cell; - net->ws_cell = new_root; + old_root = rcu_replace_pointer(net->ws_cell, new_root, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); - afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); + afs_unuse_cell(old_root, afs_cell_trace_unuse_ws); _leave(" = 0"); return 0; } @@ -500,39 +517,24 @@ static void afs_cell_destroy(struct rcu_head *rcu) trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free); afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers)); - afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias); + afs_unuse_cell(cell->alias_of, afs_cell_trace_unuse_alias); key_put(cell->anonymous_key); - kfree(cell->name); + idr_remove(&net->cells_dyn_ino, cell->dynroot_ino); + kfree(cell->name - 1); kfree(cell); afs_dec_cells_outstanding(net); _leave(" [destroyed]"); } -/* - * Queue the cell manager. - */ -static void afs_queue_cell_manager(struct afs_net *net) -{ - int outstanding = atomic_inc_return(&net->cells_outstanding); - - _enter("%d", outstanding); - - if (!queue_work(afs_wq, &net->cells_manager)) - afs_dec_cells_outstanding(net); -} - -/* - * Cell management timer. We have an increment on cells_outstanding that we - * need to pass along to the work item. - */ -void afs_cells_timer(struct timer_list *timer) +static void afs_destroy_cell_work(struct work_struct *work) { - struct afs_net *net = container_of(timer, struct afs_net, cells_timer); + struct afs_cell *cell = container_of(work, struct afs_cell, destroyer); - _enter(""); - if (!queue_work(afs_wq, &net->cells_manager)) - afs_dec_cells_outstanding(net); + afs_see_cell(cell, afs_cell_trace_destroy); + timer_delete_sync(&cell->management_timer); + cancel_work_sync(&cell->manager); + call_rcu(&cell->rcu, afs_cell_destroy); } /* @@ -564,7 +566,7 @@ void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason) if (zero) { a = atomic_read(&cell->active); WARN(a != 0, "Cell active count %u > 0\n", a); - call_rcu(&cell->rcu, afs_cell_destroy); + WARN_ON(!queue_work(afs_wq, &cell->destroyer)); } } } @@ -576,10 +578,9 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) { int r, a; - r = refcount_read(&cell->ref); - WARN_ON(r == 0); + __refcount_inc(&cell->ref, &r); a = atomic_inc_return(&cell->active); - trace_afs_cell(cell->debug_id, r, a, reason); + trace_afs_cell(cell->debug_id, r + 1, a, reason); return cell; } @@ -587,10 +588,11 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) * Record a cell becoming less active. When the active counter reaches 1, it * is scheduled for destruction, but may get reactivated. */ -void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason) +void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason) { unsigned int debug_id; time64_t now, expire_delay; + bool zero; int r, a; if (!cell) @@ -605,13 +607,15 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr expire_delay = afs_cell_gc_delay; debug_id = cell->debug_id; - r = refcount_read(&cell->ref); a = atomic_dec_return(&cell->active); - trace_afs_cell(debug_id, r, a, reason); - WARN_ON(a == 0); - if (a == 1) + if (!a) /* 'cell' may now be garbage collected. */ - afs_set_cell_timer(net, expire_delay); + afs_set_cell_timer(cell, expire_delay); + + zero = __refcount_dec_and_test(&cell->ref, &r); + trace_afs_cell(debug_id, r - 1, a, reason); + if (zero) + WARN_ON(!queue_work(afs_wq, &cell->destroyer)); } /* @@ -631,9 +635,27 @@ void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason) */ void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason) { - afs_get_cell(cell, reason); - if (!queue_work(afs_wq, &cell->manager)) - afs_put_cell(cell, afs_cell_trace_put_queue_fail); + queue_work(afs_wq, &cell->manager); +} + +/* + * Cell-specific management timer. + */ +static void afs_cell_timer(struct timer_list *timer) +{ + struct afs_cell *cell = container_of(timer, struct afs_cell, management_timer); + + afs_see_cell(cell, afs_cell_trace_see_mgmt_timer); + if (refcount_read(&cell->ref) > 0 && cell->net->live) + queue_work(afs_wq, &cell->manager); +} + +/* + * Set/reduce the cell timer. + */ +void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs) +{ + timer_reduce(&cell->management_timer, jiffies + delay_secs * HZ); } /* @@ -695,7 +717,6 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) if (cell->proc_link.next) cell->proc_link.next->pprev = &cell->proc_link.next; - afs_dynroot_mkdir(net, cell); mutex_unlock(&net->proc_cells_lock); return 0; } @@ -710,242 +731,164 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) afs_proc_cell_remove(cell); mutex_lock(&net->proc_cells_lock); - hlist_del_rcu(&cell->proc_link); - afs_dynroot_rmdir(net, cell); + if (!hlist_unhashed(&cell->proc_link)) + hlist_del_rcu(&cell->proc_link); mutex_unlock(&net->proc_cells_lock); _leave(""); } +static bool afs_has_cell_expired(struct afs_cell *cell, time64_t *_next_manage) +{ + const struct afs_vlserver_list *vllist; + time64_t expire_at = cell->last_inactive; + time64_t now = ktime_get_real_seconds(); + + if (atomic_read(&cell->active)) + return false; + if (!cell->net->live) + return true; + + vllist = rcu_dereference_protected(cell->vl_servers, true); + if (vllist && vllist->nr_servers > 0) + expire_at += afs_cell_gc_delay; + + if (expire_at <= now) + return true; + if (expire_at < *_next_manage) + *_next_manage = expire_at; + return false; +} + /* * Manage a cell record, initialising and destroying it, maintaining its DNS * records. */ -static void afs_manage_cell(struct afs_cell *cell) +static bool afs_manage_cell(struct afs_cell *cell) { struct afs_net *net = cell->net; - int ret, active; + time64_t next_manage = TIME64_MAX; + int ret; _enter("%s", cell->name); -again: _debug("state %u", cell->state); switch (cell->state) { - case AFS_CELL_INACTIVE: - case AFS_CELL_FAILED: - down_write(&net->cells_lock); - active = 1; - if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) { - rb_erase(&cell->net_node, &net->cells); - trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0, - afs_cell_trace_unuse_delete); - smp_store_release(&cell->state, AFS_CELL_REMOVED); - } - up_write(&net->cells_lock); - if (cell->state == AFS_CELL_REMOVED) { - wake_up_var(&cell->state); - goto final_destruction; - } - if (cell->state == AFS_CELL_FAILED) - goto done; - smp_store_release(&cell->state, AFS_CELL_UNSET); - wake_up_var(&cell->state); - goto again; - - case AFS_CELL_UNSET: - smp_store_release(&cell->state, AFS_CELL_ACTIVATING); - wake_up_var(&cell->state); - goto again; - - case AFS_CELL_ACTIVATING: - ret = afs_activate_cell(net, cell); - if (ret < 0) - goto activation_failed; + case AFS_CELL_SETTING_UP: + goto set_up_cell; + case AFS_CELL_ACTIVE: + goto cell_is_active; + case AFS_CELL_REMOVING: + WARN_ON_ONCE(1); + return false; + case AFS_CELL_DEAD: + return false; + default: + _debug("bad state %u", cell->state); + WARN_ON_ONCE(1); /* Unhandled state */ + return false; + } + +set_up_cell: + ret = afs_activate_cell(net, cell); + if (ret < 0) { + cell->error = ret; + goto remove_cell; + } - smp_store_release(&cell->state, AFS_CELL_ACTIVE); - wake_up_var(&cell->state); - goto again; + afs_set_cell_state(cell, AFS_CELL_ACTIVE); - case AFS_CELL_ACTIVE: - if (atomic_read(&cell->active) > 1) { - if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { - ret = afs_update_cell(cell); - if (ret < 0) - cell->error = ret; - } - goto done; - } - smp_store_release(&cell->state, AFS_CELL_DEACTIVATING); - wake_up_var(&cell->state); - goto again; +cell_is_active: + if (afs_has_cell_expired(cell, &next_manage)) + goto remove_cell; - case AFS_CELL_DEACTIVATING: - if (atomic_read(&cell->active) > 1) - goto reverse_deactivation; - afs_deactivate_cell(net, cell); - smp_store_release(&cell->state, AFS_CELL_INACTIVE); - wake_up_var(&cell->state); - goto again; + if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { + ret = afs_update_cell(cell); + if (ret < 0) + cell->error = ret; + } - case AFS_CELL_REMOVED: - goto done; + if (next_manage < TIME64_MAX && cell->net->live) { + time64_t now = ktime_get_real_seconds(); - default: - break; + if (next_manage - now <= 0) + afs_queue_cell(cell, afs_cell_trace_queue_again); + else + afs_set_cell_timer(cell, next_manage - now); } - _debug("bad state %u", cell->state); - BUG(); /* Unhandled state */ + _leave(" [done %u]", cell->state); + return false; -activation_failed: - cell->error = ret; - afs_deactivate_cell(net, cell); +remove_cell: + down_write(&net->cells_lock); - smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */ - wake_up_var(&cell->state); - goto again; + if (atomic_read(&cell->active)) { + up_write(&net->cells_lock); + goto cell_is_active; + } -reverse_deactivation: - smp_store_release(&cell->state, AFS_CELL_ACTIVE); - wake_up_var(&cell->state); - _leave(" [deact->act]"); - return; + /* Make sure that the expiring server records are going to see the fact + * that the cell is caput. + */ + afs_set_cell_state(cell, AFS_CELL_REMOVING); -done: - _leave(" [done %u]", cell->state); - return; + afs_deactivate_cell(net, cell); + afs_purge_servers(cell); + + rb_erase(&cell->net_node, &net->cells); + afs_see_cell(cell, afs_cell_trace_unuse_delete); + up_write(&net->cells_lock); -final_destruction: /* The root volume is pinning the cell */ afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root); cell->root_volume = NULL; - afs_put_cell(cell, afs_cell_trace_put_destroy); + + afs_set_cell_state(cell, AFS_CELL_DEAD); + return true; } static void afs_manage_cell_work(struct work_struct *work) { struct afs_cell *cell = container_of(work, struct afs_cell, manager); + bool final_put; - afs_manage_cell(cell); - afs_put_cell(cell, afs_cell_trace_put_queue_work); + afs_see_cell(cell, afs_cell_trace_manage); + final_put = afs_manage_cell(cell); + afs_see_cell(cell, afs_cell_trace_managed); + if (final_put) + afs_put_cell(cell, afs_cell_trace_put_final); } /* - * Manage the records of cells known to a network namespace. This includes - * updating the DNS records and garbage collecting unused cells that were - * automatically added. - * - * Note that constructed cell records may only be removed from net->cells by - * this work item, so it is safe for this work item to stash a cursor pointing - * into the tree and then return to caller (provided it skips cells that are - * still under construction). - * - * Note also that we were given an increment on net->cells_outstanding by - * whoever queued us that we need to deal with before returning. + * Purge in-memory cell database. */ -void afs_manage_cells(struct work_struct *work) +void afs_cell_purge(struct afs_net *net) { - struct afs_net *net = container_of(work, struct afs_net, cells_manager); + struct afs_cell *ws; struct rb_node *cursor; - time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; - bool purging = !net->live; _enter(""); - /* Trawl the cell database looking for cells that have expired from - * lack of use and cells whose DNS results have expired and dispatch - * their managers. - */ - down_read(&net->cells_lock); + down_write(&net->cells_lock); + ws = rcu_replace_pointer(net->ws_cell, NULL, + lockdep_is_held(&net->cells_lock)); + up_write(&net->cells_lock); + afs_unuse_cell(ws, afs_cell_trace_unuse_ws); + _debug("kick cells"); + down_read(&net->cells_lock); for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { - struct afs_cell *cell = - rb_entry(cursor, struct afs_cell, net_node); - unsigned active; - bool sched_cell = false; - - active = atomic_read(&cell->active); - trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), - active, afs_cell_trace_manage); - - ASSERTCMP(active, >=, 1); - - if (purging) { - if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) { - active = atomic_dec_return(&cell->active); - trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), - active, afs_cell_trace_unuse_pin); - } - } + struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node); - if (active == 1) { - struct afs_vlserver_list *vllist; - time64_t expire_at = cell->last_inactive; - - read_lock(&cell->vl_servers_lock); - vllist = rcu_dereference_protected( - cell->vl_servers, - lockdep_is_held(&cell->vl_servers_lock)); - if (vllist->nr_servers > 0) - expire_at += afs_cell_gc_delay; - read_unlock(&cell->vl_servers_lock); - if (purging || expire_at <= now) - sched_cell = true; - else if (expire_at < next_manage) - next_manage = expire_at; - } + afs_see_cell(cell, afs_cell_trace_purge); - if (!purging) { - if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) - sched_cell = true; - } + if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) + afs_unuse_cell(cell, afs_cell_trace_unuse_pin); - if (sched_cell) - afs_queue_cell(cell, afs_cell_trace_get_queue_manage); + afs_queue_cell(cell, afs_cell_trace_queue_purge); } - up_read(&net->cells_lock); - /* Update the timer on the way out. We have to pass an increment on - * cells_outstanding in the namespace that we are in to the timer or - * the work scheduler. - */ - if (!purging && next_manage < TIME64_MAX) { - now = ktime_get_real_seconds(); - - if (next_manage - now <= 0) { - if (queue_work(afs_wq, &net->cells_manager)) - atomic_inc(&net->cells_outstanding); - } else { - afs_set_cell_timer(net, next_manage - now); - } - } - - afs_dec_cells_outstanding(net); - _leave(" [%d]", atomic_read(&net->cells_outstanding)); -} - -/* - * Purge in-memory cell database. - */ -void afs_cell_purge(struct afs_net *net) -{ - struct afs_cell *ws; - - _enter(""); - - down_write(&net->cells_lock); - ws = net->ws_cell; - net->ws_cell = NULL; - up_write(&net->cells_lock); - afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); - - _debug("del timer"); - if (del_timer_sync(&net->cells_timer)) - atomic_dec(&net->cells_outstanding); - - _debug("kick mgr"); - afs_queue_cell_manager(net); - _debug("wait"); wait_var_event(&net->cells_outstanding, !atomic_read(&net->cells_outstanding)); diff --git a/fs/afs/cm_security.c b/fs/afs/cm_security.c new file mode 100644 index 000000000000..edcbd249d202 --- /dev/null +++ b/fs/afs/cm_security.c @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Cache manager security. + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <crypto/krb5.h> +#include "internal.h" +#include "afs_cm.h" +#include "afs_fs.h" +#include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> + +#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c +#define xdr_round_up(x) (round_up((x), sizeof(__be32))) +#define xdr_len_object(x) (4 + round_up((x), sizeof(__be32))) + +#ifdef CONFIG_RXGK +static int afs_create_yfs_cm_token(struct sk_buff *challenge, + struct afs_server *server); +#endif + +/* + * Respond to an RxGK challenge, adding appdata. + */ +static int afs_respond_to_challenge(struct sk_buff *challenge) +{ +#ifdef CONFIG_RXGK + struct krb5_buffer appdata = {}; + struct afs_server *server; +#endif + struct rxrpc_peer *peer; + unsigned long peer_data; + u16 service_id; + u8 security_index; + + rxrpc_kernel_query_challenge(challenge, &peer, &peer_data, + &service_id, &security_index); + + _enter("%u,%u", service_id, security_index); + + switch (service_id) { + /* We don't send CM_SERVICE RPCs, so don't expect a challenge + * therefrom. + */ + case FS_SERVICE: + case VL_SERVICE: + case YFS_FS_SERVICE: + case YFS_VL_SERVICE: + break; + default: + pr_warn("Can't respond to unknown challenge %u:%u", + service_id, security_index); + return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO, + afs_abort_unsupported_sec_class); + } + + switch (security_index) { +#ifdef CONFIG_RXKAD + case RXRPC_SECURITY_RXKAD: + return rxkad_kernel_respond_to_challenge(challenge); +#endif + +#ifdef CONFIG_RXGK + case RXRPC_SECURITY_RXGK: + return rxgk_kernel_respond_to_challenge(challenge, &appdata); + + case RXRPC_SECURITY_YFS_RXGK: + switch (service_id) { + case FS_SERVICE: + case YFS_FS_SERVICE: + server = (struct afs_server *)peer_data; + if (!server->cm_rxgk_appdata.data) { + mutex_lock(&server->cm_token_lock); + if (!server->cm_rxgk_appdata.data) + afs_create_yfs_cm_token(challenge, server); + mutex_unlock(&server->cm_token_lock); + } + if (server->cm_rxgk_appdata.data) + appdata = server->cm_rxgk_appdata; + break; + } + return rxgk_kernel_respond_to_challenge(challenge, &appdata); +#endif + + default: + return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO, + afs_abort_unsupported_sec_class); + } +} + +/* + * Process the OOB message queue, processing challenge packets. + */ +void afs_process_oob_queue(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, rx_oob_work); + struct sk_buff *oob; + enum rxrpc_oob_type type; + + while ((oob = rxrpc_kernel_dequeue_oob(net->socket, &type))) { + switch (type) { + case RXRPC_OOB_CHALLENGE: + afs_respond_to_challenge(oob); + break; + } + rxrpc_kernel_free_oob(oob); + } +} + +#ifdef CONFIG_RXGK +/* + * Create a securities keyring for the cache manager and attach a key to it for + * the RxGK tokens we want to use to secure the callback connection back from + * the fileserver. + */ +int afs_create_token_key(struct afs_net *net, struct socket *socket) +{ + const struct krb5_enctype *krb5; + struct key *ring; + key_ref_t key; + char K0[32], *desc; + int ret; + + ring = keyring_alloc("kafs", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), + KEY_POS_SEARCH | KEY_POS_WRITE | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH, + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(ring)) + return PTR_ERR(ring); + + ret = rxrpc_sock_set_security_keyring(socket->sk, ring); + if (ret < 0) + goto out; + + ret = -ENOPKG; + krb5 = crypto_krb5_find_enctype(KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96); + if (!krb5) + goto out; + + if (WARN_ON_ONCE(krb5->key_len > sizeof(K0))) + goto out; + + ret = -ENOMEM; + desc = kasprintf(GFP_KERNEL, "%u:%u:%u:%u", + YFS_CM_SERVICE, RXRPC_SECURITY_YFS_RXGK, 1, krb5->etype); + if (!desc) + goto out; + + wait_for_random_bytes(); + get_random_bytes(K0, krb5->key_len); + + key = key_create(make_key_ref(ring, true), + "rxrpc_s", desc, + K0, krb5->key_len, + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW, + KEY_ALLOC_NOT_IN_QUOTA); + kfree(desc); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto out; + } + + net->fs_cm_token_key = key_ref_to_ptr(key); + ret = 0; +out: + key_put(ring); + return ret; +} + +/* + * Create an YFS RxGK GSS token to use as a ticket to the specified fileserver. + */ +static int afs_create_yfs_cm_token(struct sk_buff *challenge, + struct afs_server *server) +{ + const struct krb5_enctype *conn_krb5, *token_krb5; + const struct krb5_buffer *token_key; + struct crypto_aead *aead; + struct scatterlist sg; + struct afs_net *net = server->cell->net; + const struct key *key = net->fs_cm_token_key; + size_t keysize, uuidsize, authsize, toksize, encsize, contsize, adatasize, offset; + __be32 caps[1] = { + [0] = htonl(AFS_CAP_ERROR_TRANSLATION), + }; + __be32 *xdr; + void *appdata, *K0, *encbase; + u32 enctype; + int ret; + + if (!key) + return -ENOKEY; + + /* Assume that the fileserver is happy to use the same encoding type as + * we were told to use by the token obtained by the user. + */ + enctype = rxgk_kernel_query_challenge(challenge); + + conn_krb5 = crypto_krb5_find_enctype(enctype); + if (!conn_krb5) + return -ENOPKG; + token_krb5 = key->payload.data[0]; + token_key = (const struct krb5_buffer *)&key->payload.data[2]; + + /* struct rxgk_key { + * afs_uint32 enctype; + * opaque key<>; + * }; + */ + keysize = 4 + xdr_len_object(conn_krb5->key_len); + + /* struct RXGK_AuthName { + * afs_int32 kind; + * opaque data<AUTHDATAMAX>; + * opaque display<AUTHPRINTABLEMAX>; + * }; + */ + uuidsize = sizeof(server->uuid); + authsize = 4 + xdr_len_object(uuidsize) + xdr_len_object(0); + + /* struct RXGK_Token { + * rxgk_key K0; + * RXGK_Level level; + * rxgkTime starttime; + * afs_int32 lifetime; + * afs_int32 bytelife; + * rxgkTime expirationtime; + * struct RXGK_AuthName identities<>; + * }; + */ + toksize = keysize + 8 + 4 + 4 + 8 + xdr_len_object(authsize); + + offset = 0; + encsize = crypto_krb5_how_much_buffer(token_krb5, KRB5_ENCRYPT_MODE, toksize, &offset); + + /* struct RXGK_TokenContainer { + * afs_int32 kvno; + * afs_int32 enctype; + * opaque encrypted_token<>; + * }; + */ + contsize = 4 + 4 + xdr_len_object(encsize); + + /* struct YFSAppData { + * opr_uuid initiatorUuid; + * opr_uuid acceptorUuid; + * Capabilities caps; + * afs_int32 enctype; + * opaque callbackKey<>; + * opaque callbackToken<>; + * }; + */ + adatasize = 16 + 16 + + xdr_len_object(sizeof(caps)) + + 4 + + xdr_len_object(conn_krb5->key_len) + + xdr_len_object(contsize); + + ret = -ENOMEM; + appdata = kzalloc(adatasize, GFP_KERNEL); + if (!appdata) + goto out; + xdr = appdata; + + memcpy(xdr, &net->uuid, 16); /* appdata.initiatorUuid */ + xdr += 16 / 4; + memcpy(xdr, &server->uuid, 16); /* appdata.acceptorUuid */ + xdr += 16 / 4; + *xdr++ = htonl(ARRAY_SIZE(caps)); /* appdata.caps.len */ + memcpy(xdr, &caps, sizeof(caps)); /* appdata.caps */ + xdr += ARRAY_SIZE(caps); + *xdr++ = htonl(conn_krb5->etype); /* appdata.enctype */ + + *xdr++ = htonl(conn_krb5->key_len); /* appdata.callbackKey.len */ + K0 = xdr; + get_random_bytes(K0, conn_krb5->key_len); /* appdata.callbackKey.data */ + xdr += xdr_round_up(conn_krb5->key_len) / 4; + + *xdr++ = htonl(contsize); /* appdata.callbackToken.len */ + *xdr++ = htonl(1); /* cont.kvno */ + *xdr++ = htonl(token_krb5->etype); /* cont.enctype */ + *xdr++ = htonl(encsize); /* cont.encrypted_token.len */ + + encbase = xdr; + xdr += offset / 4; + *xdr++ = htonl(conn_krb5->etype); /* token.K0.enctype */ + *xdr++ = htonl(conn_krb5->key_len); /* token.K0.key.len */ + memcpy(xdr, K0, conn_krb5->key_len); /* token.K0.key.data */ + xdr += xdr_round_up(conn_krb5->key_len) / 4; + + *xdr++ = htonl(RXRPC_SECURITY_ENCRYPT); /* token.level */ + *xdr++ = htonl(0); /* token.starttime */ + *xdr++ = htonl(0); /* " */ + *xdr++ = htonl(0); /* token.lifetime */ + *xdr++ = htonl(0); /* token.bytelife */ + *xdr++ = htonl(0); /* token.expirationtime */ + *xdr++ = htonl(0); /* " */ + *xdr++ = htonl(1); /* token.identities.count */ + *xdr++ = htonl(0); /* token.identities[0].kind */ + *xdr++ = htonl(uuidsize); /* token.identities[0].data.len */ + memcpy(xdr, &server->uuid, uuidsize); + xdr += xdr_round_up(uuidsize) / 4; + *xdr++ = htonl(0); /* token.identities[0].display.len */ + + xdr = encbase + xdr_round_up(encsize); + + if ((unsigned long)xdr - (unsigned long)appdata != adatasize) + pr_err("Appdata size incorrect %lx != %zx\n", + (unsigned long)xdr - (unsigned long)appdata, adatasize); + + aead = crypto_krb5_prepare_encryption(token_krb5, token_key, RXGK_SERVER_ENC_TOKEN, + GFP_KERNEL); + if (IS_ERR(aead)) { + ret = PTR_ERR(aead); + goto out_token; + } + + sg_init_one(&sg, encbase, encsize); + ret = crypto_krb5_encrypt(token_krb5, aead, &sg, 1, encsize, offset, toksize, false); + if (ret < 0) + goto out_aead; + + server->cm_rxgk_appdata.len = adatasize; + server->cm_rxgk_appdata.data = appdata; + appdata = NULL; + +out_aead: + crypto_free_aead(aead); +out_token: + kfree(appdata); +out: + return ret; +} +#endif /* CONFIG_RXGK */ diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 99a3f20bc786..1a906805a9e3 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -139,49 +139,6 @@ bool afs_cm_incoming_call(struct afs_call *call) } /* - * Find the server record by peer address and record a probe to the cache - * manager from a server. - */ -static int afs_find_cm_server_by_peer(struct afs_call *call) -{ - struct sockaddr_rxrpc srx; - struct afs_server *server; - struct rxrpc_peer *peer; - - peer = rxrpc_kernel_get_call_peer(call->net->socket, call->rxcall); - - server = afs_find_server(call->net, peer); - if (!server) { - trace_afs_cm_no_server(call, &srx); - return 0; - } - - call->server = server; - return 0; -} - -/* - * Find the server record by server UUID and record a probe to the cache - * manager from a server. - */ -static int afs_find_cm_server_by_uuid(struct afs_call *call, - struct afs_uuid *uuid) -{ - struct afs_server *server; - - rcu_read_lock(); - server = afs_find_server_by_uuid(call->net, call->request); - rcu_read_unlock(); - if (!server) { - trace_afs_cm_no_server_u(call, call->request); - return 0; - } - - call->server = server; - return 0; -} - -/* * Clean up a cache manager call. */ static void afs_cm_destructor(struct afs_call *call) @@ -322,10 +279,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - - /* we'll need the file server record as that tells us which set of - * vnodes to operate upon */ - return afs_find_cm_server_by_peer(call); + return 0; } /* @@ -349,18 +303,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) */ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { - int ret; - _enter(""); afs_extract_discard(call, 0); - ret = afs_extract_data(call, false); - if (ret < 0) - return ret; - - /* we'll need the file server record as that tells us which set of - * vnodes to operate upon */ - return afs_find_cm_server_by_peer(call); + return afs_extract_data(call, false); } /* @@ -373,8 +319,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) __be32 *b; int ret; - _enter(""); - _enter("{%u}", call->unmarshall); switch (call->unmarshall) { @@ -421,9 +365,13 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - /* we'll need the file server record as that tells us which set of - * vnodes to operate upon */ - return afs_find_cm_server_by_uuid(call, call->request); + if (memcmp(call->request, &call->server->_uuid, sizeof(call->server->_uuid)) != 0) { + pr_notice("Callback UUID does not match fileserver UUID\n"); + trace_afs_cm_no_server_u(call, call->request); + return 0; + } + + return 0; } /* @@ -455,7 +403,7 @@ static int afs_deliver_cb_probe(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - return afs_find_cm_server_by_peer(call); + return 0; } /* @@ -533,7 +481,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - return afs_find_cm_server_by_peer(call); + return 0; } /* @@ -593,7 +541,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - return afs_find_cm_server_by_peer(call); + return 0; } /* @@ -667,9 +615,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - - /* We'll need the file server record as that tells us which set of - * vnodes to operate upon. - */ - return afs_find_cm_server_by_peer(call); + return 0; } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 67afe68972d5..bfb69e066672 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -12,6 +12,8 @@ #include <linux/swap.h> #include <linux/ctype.h> #include <linux/sched.h> +#include <linux/iversion.h> +#include <linux/iov_iter.h> #include <linux/task_io_accounting_ops.h> #include "internal.h" #include "afs_fs.h" @@ -21,7 +23,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); +static int afs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_iput(struct dentry *dentry, struct inode *inode); static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, @@ -30,8 +33,8 @@ static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nl loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl); -static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode); +static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode); static int afs_rmdir(struct inode *dir, struct dentry *dentry); static int afs_unlink(struct inode *dir, struct dentry *dentry); static int afs_link(struct dentry *from, struct inode *dir, @@ -41,15 +44,6 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); -static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags); -static void afs_dir_invalidate_folio(struct folio *folio, size_t offset, - size_t length); - -static bool afs_dir_dirty_folio(struct address_space *mapping, - struct folio *folio) -{ - BUG(); /* This should never happen. */ -} const struct file_operations afs_dir_file_operations = { .open = afs_dir_open, @@ -74,10 +68,7 @@ const struct inode_operations afs_dir_inode_operations = { }; const struct address_space_operations afs_dir_aops = { - .dirty_folio = afs_dir_dirty_folio, - .release_folio = afs_dir_release_folio, - .invalidate_folio = afs_dir_invalidate_folio, - .migrate_folio = filemap_migrate_folio, + .writepages = afs_single_writepages, }; const struct dentry_operations afs_fs_dentry_operations = { @@ -98,152 +89,124 @@ struct afs_lookup_one_cookie { struct afs_lookup_cookie { struct dir_context ctx; struct qstr name; - bool found; - bool one_only; unsigned short nr_fids; struct afs_fid fids[50]; }; +static void afs_dir_unuse_cookie(struct afs_vnode *dvnode, int ret) +{ + if (ret == 0) { + struct afs_vnode_cache_aux aux; + loff_t i_size = i_size_read(&dvnode->netfs.inode); + + afs_set_cache_aux(dvnode, &aux); + fscache_unuse_cookie(afs_vnode_cache(dvnode), &aux, &i_size); + } else { + fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL); + } +} + /* - * Drop the refs that we're holding on the folios we were reading into. We've - * got refs on the first nr_pages pages. + * Iterate through a kmapped directory segment, dumping a summary of + * the contents. */ -static void afs_dir_read_cleanup(struct afs_read *req) +static size_t afs_dir_dump_step(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2) { - struct address_space *mapping = req->vnode->netfs.inode.i_mapping; - struct folio *folio; - pgoff_t last = req->nr_pages - 1; + do { + union afs_xdr_dir_block *block = iter_base; - XA_STATE(xas, &mapping->i_pages, 0); + pr_warn("[%05zx] %32phN\n", progress, block); + iter_base += AFS_DIR_BLOCK_SIZE; + progress += AFS_DIR_BLOCK_SIZE; + len -= AFS_DIR_BLOCK_SIZE; + } while (len > 0); - if (unlikely(!req->nr_pages)) - return; + return len; +} - rcu_read_lock(); - xas_for_each(&xas, folio, last) { - if (xas_retry(&xas, folio)) - continue; - BUG_ON(xa_is_value(folio)); - ASSERTCMP(folio->mapping, ==, mapping); +/* + * Dump the contents of a directory. + */ +static void afs_dir_dump(struct afs_vnode *dvnode) +{ + struct iov_iter iter; + unsigned long long i_size = i_size_read(&dvnode->netfs.inode); - folio_put(folio); - } + pr_warn("DIR %llx:%llx is=%llx\n", + dvnode->fid.vid, dvnode->fid.vnode, i_size); - rcu_read_unlock(); + iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size); + iterate_folioq(&iter, iov_iter_count(&iter), NULL, NULL, + afs_dir_dump_step); } /* * check that a directory folio is valid */ -static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio, - loff_t i_size) +static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress, + union afs_xdr_dir_block *block) { - union afs_xdr_dir_block *block; - size_t offset, size; - loff_t pos; + if (block->hdr.magic != AFS_DIR_MAGIC) { + pr_warn("%s(%lx): [%zx] bad magic %04x\n", + __func__, dvnode->netfs.inode.i_ino, + progress, ntohs(block->hdr.magic)); + trace_afs_dir_check_failed(dvnode, progress); + trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic); + return false; + } - /* Determine how many magic numbers there should be in this folio, but - * we must take care because the directory may change size under us. + /* Make sure each block is NUL terminated so we can reasonably + * use string functions on it. The filenames in the folio + * *should* be NUL-terminated anyway. */ - pos = folio_pos(folio); - if (i_size <= pos) - goto checked; - - size = min_t(loff_t, folio_size(folio), i_size - pos); - for (offset = 0; offset < size; offset += sizeof(*block)) { - block = kmap_local_folio(folio, offset); - if (block->hdr.magic != AFS_DIR_MAGIC) { - printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n", - __func__, dvnode->netfs.inode.i_ino, - pos, offset, size, ntohs(block->hdr.magic)); - trace_afs_dir_check_failed(dvnode, pos + offset, i_size); - kunmap_local(block); - trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic); - goto error; - } - - /* Make sure each block is NUL terminated so we can reasonably - * use string functions on it. The filenames in the folio - * *should* be NUL-terminated anyway. - */ - ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0; - - kunmap_local(block); - } -checked: + ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0; afs_stat_v(dvnode, n_read_dir); return true; - -error: - return false; } /* - * Dump the contents of a directory. + * Iterate through a kmapped directory segment, checking the content. */ -static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req) +static size_t afs_dir_check_step(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2) { - union afs_xdr_dir_block *block; - struct address_space *mapping = dvnode->netfs.inode.i_mapping; - struct folio *folio; - pgoff_t last = req->nr_pages - 1; - size_t offset, size; - - XA_STATE(xas, &mapping->i_pages, 0); - - pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n", - dvnode->fid.vid, dvnode->fid.vnode, - req->file_size, req->len, req->actual_len); - pr_warn("DIR %llx %x %zx %zx\n", - req->pos, req->nr_pages, - req->iter->iov_offset, iov_iter_count(req->iter)); - - xas_for_each(&xas, folio, last) { - if (xas_retry(&xas, folio)) - continue; + struct afs_vnode *dvnode = priv; - BUG_ON(folio->mapping != mapping); + if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE || + len % AFS_DIR_BLOCK_SIZE)) + return len; - size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio)); - for (offset = 0; offset < size; offset += sizeof(*block)) { - block = kmap_local_folio(folio, offset); - pr_warn("[%02lx] %32phN\n", folio->index + offset, block); - kunmap_local(block); - } - } + do { + if (!afs_dir_check_block(dvnode, progress, iter_base)) + break; + iter_base += AFS_DIR_BLOCK_SIZE; + len -= AFS_DIR_BLOCK_SIZE; + } while (len > 0); + + return len; } /* - * Check all the blocks in a directory. All the folios are held pinned. + * Check all the blocks in a directory. */ -static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req) +static int afs_dir_check(struct afs_vnode *dvnode) { - struct address_space *mapping = dvnode->netfs.inode.i_mapping; - struct folio *folio; - pgoff_t last = req->nr_pages - 1; - int ret = 0; - - XA_STATE(xas, &mapping->i_pages, 0); + struct iov_iter iter; + unsigned long long i_size = i_size_read(&dvnode->netfs.inode); + size_t checked = 0; - if (unlikely(!req->nr_pages)) + if (unlikely(!i_size)) return 0; - rcu_read_lock(); - xas_for_each(&xas, folio, last) { - if (xas_retry(&xas, folio)) - continue; - - BUG_ON(folio->mapping != mapping); - - if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) { - afs_dir_dump(dvnode, req); - ret = -EIO; - break; - } + iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size); + checked = iterate_folioq(&iter, iov_iter_count(&iter), dvnode, NULL, + afs_dir_check_step); + if (checked != i_size) { + afs_dir_dump(dvnode); + return -EIO; } - - rcu_read_unlock(); - return ret; + return 0; } /* @@ -263,134 +226,140 @@ static int afs_dir_open(struct inode *inode, struct file *file) } /* - * Read the directory into the pagecache in one go, scrubbing the previous - * contents. The list of folios is returned, pinning them so that they don't - * get reclaimed during the iteration. + * Read a file in a single download. */ -static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) - __acquires(&dvnode->validate_lock) +static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file) { - struct address_space *mapping = dvnode->netfs.inode.i_mapping; - struct afs_read *req; + struct iov_iter iter; + ssize_t ret; loff_t i_size; - int nr_pages, i; - int ret; - loff_t remote_size = 0; - - _enter(""); - - req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); + bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) && + !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags)); - refcount_set(&req->usage, 1); - req->vnode = dvnode; - req->key = key_get(key); - req->cleanup = afs_dir_read_cleanup; - -expand: i_size = i_size_read(&dvnode->netfs.inode); - if (i_size < remote_size) - i_size = remote_size; - if (i_size < 2048) { - ret = afs_bad(dvnode, afs_file_error_dir_small); - goto error; - } - if (i_size > 2048 * 1024) { - trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); - ret = -EFBIG; - goto error; + if (is_dir) { + if (i_size < AFS_DIR_BLOCK_SIZE) + return afs_bad(dvnode, afs_file_error_dir_small); + if (i_size > AFS_DIR_BLOCK_SIZE * 1024) { + trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); + return -EFBIG; + } + } else { + if (i_size > AFSPATHMAX) { + trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); + return -EFBIG; + } } - _enter("%llu", i_size); + /* Expand the storage. TODO: Shrink the storage too. */ + if (dvnode->directory_size < i_size) { + size_t cur_size = dvnode->directory_size; - nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE; + ret = netfs_alloc_folioq_buffer(NULL, + &dvnode->directory, &cur_size, i_size, + mapping_gfp_mask(dvnode->netfs.inode.i_mapping)); + dvnode->directory_size = cur_size; + if (ret < 0) + return ret; + } - req->actual_len = i_size; /* May change */ - req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ - req->data_version = dvnode->status.data_version; /* May change */ - iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages, - 0, i_size); - req->iter = &req->def_iter; + iov_iter_folio_queue(&iter, ITER_DEST, dvnode->directory, 0, 0, dvnode->directory_size); - /* Fill in any gaps that we might find where the memory reclaimer has - * been at work and pin all the folios. If there are any gaps, we will - * need to reread the entire directory contents. + /* AFS requires us to perform the read of a directory synchronously as + * a single unit to avoid issues with the directory contents being + * changed between reads. */ - i = req->nr_pages; - while (i < nr_pages) { - struct folio *folio; - - folio = filemap_get_folio(mapping, i); - if (IS_ERR(folio)) { - if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_stat_v(dvnode, n_inval); - folio = __filemap_get_folio(mapping, - i, FGP_LOCK | FGP_CREAT, - mapping->gfp_mask); - if (IS_ERR(folio)) { - ret = PTR_ERR(folio); - goto error; - } - folio_attach_private(folio, (void *)1); - folio_unlock(folio); + ret = netfs_read_single(&dvnode->netfs.inode, file, &iter); + if (ret >= 0) { + i_size = i_size_read(&dvnode->netfs.inode); + if (i_size > ret) { + /* The content has grown, so we need to expand the + * buffer. + */ + ret = -ESTALE; + } else if (is_dir) { + int ret2 = afs_dir_check(dvnode); + + if (ret2 < 0) + ret = ret2; + } else if (i_size < folioq_folio_size(dvnode->directory, 0)) { + /* NUL-terminate a symlink. */ + char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0); + + symlink[i_size] = 0; + kunmap_local(symlink); } - - req->nr_pages += folio_nr_pages(folio); - i += folio_nr_pages(folio); } - /* If we're going to reload, we need to lock all the pages to prevent - * races. - */ + return ret; +} + +ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file) +{ + ssize_t ret; + + fscache_use_cookie(afs_vnode_cache(dvnode), false); + ret = afs_do_read_single(dvnode, file); + fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL); + return ret; +} + +/* + * Read the directory into a folio_queue buffer in one go, scrubbing the + * previous contents. We return -ESTALE if the caller needs to call us again. + */ +ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file) + __acquires(&dvnode->validate_lock) +{ + ssize_t ret; + loff_t i_size; + + i_size = i_size_read(&dvnode->netfs.inode); + ret = -ERESTARTSYS; if (down_read_killable(&dvnode->validate_lock) < 0) goto error; - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - goto success; + /* We only need to reread the data if it became invalid - or if we + * haven't read it yet. + */ + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) { + ret = i_size; + goto valid; + } up_read(&dvnode->validate_lock); if (down_write_killable(&dvnode->validate_lock) < 0) goto error; - if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { - trace_afs_reload_dir(dvnode); - ret = afs_fetch_data(dvnode, req); - if (ret < 0) - goto error_unlock; - - task_io_account_read(PAGE_SIZE * req->nr_pages); - - if (req->len < req->file_size) { - /* The content has grown, so we need to expand the - * buffer. - */ - up_write(&dvnode->validate_lock); - remote_size = req->file_size; - goto expand; - } + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_invalidate_cache(dvnode, 0); - /* Validate the data we just read. */ - ret = afs_dir_check(dvnode, req); + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) || + !test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) { + trace_afs_reload_dir(dvnode); + ret = afs_read_single(dvnode, file); if (ret < 0) goto error_unlock; // TODO: Trim excess pages set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); + set_bit(AFS_VNODE_DIR_READ, &dvnode->flags); + } else { + ret = i_size; } downgrade_write(&dvnode->validate_lock); -success: - return req; +valid: + return ret; error_unlock: up_write(&dvnode->validate_lock); error: - afs_put_read(req); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = %zd", ret); + return ret; } /* @@ -398,79 +367,69 @@ error: */ static int afs_dir_iterate_block(struct afs_vnode *dvnode, struct dir_context *ctx, - union afs_xdr_dir_block *block, - unsigned blkoff) + union afs_xdr_dir_block *block) { union afs_xdr_dirent *dire; - unsigned offset, next, curr, nr_slots; + unsigned int blknum, base, hdr, pos, next, nr_slots; size_t nlen; int tmp; - _enter("%llx,%x", ctx->pos, blkoff); + blknum = ctx->pos / AFS_DIR_BLOCK_SIZE; + base = blknum * AFS_DIR_SLOTS_PER_BLOCK; + hdr = (blknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + pos = DIV_ROUND_UP(ctx->pos, AFS_DIR_DIRENT_SIZE) - base; - curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent); + _enter("%llx,%x", ctx->pos, blknum); /* walk through the block, an entry at a time */ - for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); - offset < AFS_DIR_SLOTS_PER_BLOCK; - offset = next - ) { + for (unsigned int slot = hdr; slot < AFS_DIR_SLOTS_PER_BLOCK; slot = next) { /* skip entries marked unused in the bitmap */ - if (!(block->hdr.bitmap[offset / 8] & - (1 << (offset % 8)))) { - _debug("ENT[%zu.%u]: unused", - blkoff / sizeof(union afs_xdr_dir_block), offset); - next = offset + 1; - if (offset >= curr) - ctx->pos = blkoff + - next * sizeof(union afs_xdr_dirent); + if (!(block->hdr.bitmap[slot / 8] & + (1 << (slot % 8)))) { + _debug("ENT[%x]: Unused", base + slot); + next = slot + 1; + if (next >= pos) + ctx->pos = (base + next) * sizeof(union afs_xdr_dirent); continue; } /* got a valid entry */ - dire = &block->dirents[offset]; + dire = &block->dirents[slot]; nlen = strnlen(dire->u.name, - sizeof(*block) - - offset * sizeof(union afs_xdr_dirent)); + (unsigned long)(block + 1) - (unsigned long)dire->u.name - 1); if (nlen > AFSNAMEMAX - 1) { - _debug("ENT[%zu]: name too long (len %u/%zu)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, nlen); + _debug("ENT[%x]: Name too long (len %zx)", + base + slot, nlen); return afs_bad(dvnode, afs_file_error_dir_name_too_long); } - _debug("ENT[%zu.%u]: %s %zu \"%s\"", - blkoff / sizeof(union afs_xdr_dir_block), offset, - (offset < curr ? "skip" : "fill"), + _debug("ENT[%x]: %s %zx \"%s\"", + base + slot, (slot < pos ? "skip" : "fill"), nlen, dire->u.name); nr_slots = afs_dir_calc_slots(nlen); - next = offset + nr_slots; + next = slot + nr_slots; if (next > AFS_DIR_SLOTS_PER_BLOCK) { - _debug("ENT[%zu.%u]:" - " %u extends beyond end dir block" - " (len %zu)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, next, nlen); + _debug("ENT[%x]: extends beyond end dir block (len %zx)", + base + slot, nlen); return afs_bad(dvnode, afs_file_error_dir_over_end); } /* Check that the name-extension dirents are all allocated */ for (tmp = 1; tmp < nr_slots; tmp++) { - unsigned int ix = offset + tmp; - if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) { - _debug("ENT[%zu.u]:" - " %u unmarked extension (%u/%u)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, tmp, nr_slots); + unsigned int xslot = slot + tmp; + + if (!(block->hdr.bitmap[xslot / 8] & (1 << (xslot % 8)))) { + _debug("ENT[%x]: Unmarked extension (%x/%x)", + base + slot, tmp, nr_slots); return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } } /* skip if starts before the current position */ - if (offset < curr) { - if (next > curr) - ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); + if (slot < pos) { + if (next > pos) + ctx->pos = (base + next) * sizeof(union afs_xdr_dirent); continue; } @@ -484,75 +443,110 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, return 0; } - ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); + ctx->pos = (base + next) * sizeof(union afs_xdr_dirent); } _leave(" = 1 [more]"); return 1; } +struct afs_dir_iteration_ctx { + struct dir_context *dir_ctx; + int error; +}; + /* - * iterate through the data blob that lists the contents of an AFS directory + * Iterate through a kmapped directory segment. */ -static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, - struct key *key, afs_dataversion_t *_dir_version) +static size_t afs_dir_iterate_step(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2) { - struct afs_vnode *dvnode = AFS_FS_I(dir); - union afs_xdr_dir_block *dblock; - struct afs_read *req; - struct folio *folio; - unsigned offset, size; + struct afs_dir_iteration_ctx *ctx = priv2; + struct afs_vnode *dvnode = priv; int ret; - _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos); - - if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { - _leave(" = -ESTALE"); - return -ESTALE; + if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE || + len % AFS_DIR_BLOCK_SIZE)) { + pr_err("Mis-iteration prog=%zx len=%zx\n", + progress % AFS_DIR_BLOCK_SIZE, + len % AFS_DIR_BLOCK_SIZE); + return len; } - req = afs_read_dir(dvnode, key); - if (IS_ERR(req)) - return PTR_ERR(req); - *_dir_version = req->data_version; + do { + ret = afs_dir_iterate_block(dvnode, ctx->dir_ctx, iter_base); + if (ret != 1) + break; - /* round the file position up to the next entry boundary */ - ctx->pos += sizeof(union afs_xdr_dirent) - 1; - ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1); + ctx->dir_ctx->pos = round_up(ctx->dir_ctx->pos, AFS_DIR_BLOCK_SIZE); + iter_base += AFS_DIR_BLOCK_SIZE; + len -= AFS_DIR_BLOCK_SIZE; + } while (len > 0); - /* walk through the blocks in sequence */ - ret = 0; - while (ctx->pos < req->actual_len) { - /* Fetch the appropriate folio from the directory and re-add it - * to the LRU. We have all the pages pinned with an extra ref. - */ - folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE, - FGP_ACCESSED, 0); - if (IS_ERR(folio)) { - ret = afs_bad(dvnode, afs_file_error_dir_missing_page); - break; - } + return len; +} - offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio); - size = min_t(loff_t, folio_size(folio), - req->actual_len - folio_file_pos(folio)); +/* + * Iterate through the directory folios. + */ +static int afs_dir_iterate_contents(struct inode *dir, struct dir_context *dir_ctx) +{ + struct afs_dir_iteration_ctx ctx = { .dir_ctx = dir_ctx }; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct iov_iter iter; + unsigned long long i_size = i_size_read(dir); - do { - dblock = kmap_local_folio(folio, offset); - ret = afs_dir_iterate_block(dvnode, ctx, dblock, - folio_file_pos(folio) + offset); - kunmap_local(dblock); - if (ret != 1) - goto out; + /* Round the file position up to the next entry boundary */ + dir_ctx->pos = round_up(dir_ctx->pos, sizeof(union afs_xdr_dirent)); - } while (offset += sizeof(*dblock), offset < size); + if (i_size <= 0 || dir_ctx->pos >= i_size) + return 0; - ret = 0; - } + iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size); + iov_iter_advance(&iter, round_down(dir_ctx->pos, AFS_DIR_BLOCK_SIZE)); + + iterate_folioq(&iter, iov_iter_count(&iter), dvnode, &ctx, + afs_dir_iterate_step); + + if (ctx.error == -ESTALE) + afs_invalidate_dir(dvnode, afs_dir_invalid_iter_stale); + return ctx.error; +} + +/* + * iterate through the data blob that lists the contents of an AFS directory + */ +static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, + struct file *file, afs_dataversion_t *_dir_version) +{ + struct afs_vnode *dvnode = AFS_FS_I(dir); + int retry_limit = 100; + int ret; + + _enter("{%lu},%llx,,", dir->i_ino, ctx->pos); + + do { + if (--retry_limit < 0) { + pr_warn("afs_read_dir(): Too many retries\n"); + ret = -ESTALE; + break; + } + ret = afs_read_dir(dvnode, file); + if (ret < 0) { + if (ret != -ESTALE) + break; + if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { + ret = -ESTALE; + break; + } + continue; + } + *_dir_version = inode_peek_iversion_raw(dir); + + ret = afs_dir_iterate_contents(dir, ctx); + up_read(&dvnode->validate_lock); + } while (ret == -ESTALE); -out: - up_read(&dvnode->validate_lock); - afs_put_read(req); _leave(" = %d", ret); return ret; } @@ -564,8 +558,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) { afs_dataversion_t dir_version; - return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file), - &dir_version); + return afs_dir_iterate(file_inode(file), ctx, file, &dir_version); } /* @@ -605,22 +598,22 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, * Do a lookup of a single name in a directory * - just returns the FID the dentry name maps to if found */ -static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, - struct afs_fid *fid, struct key *key, +static int afs_do_lookup_one(struct inode *dir, const struct qstr *name, + struct afs_fid *fid, afs_dataversion_t *_dir_version) { struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_lookup_one_cookie cookie = { .ctx.actor = afs_lookup_one_filldir, - .name = dentry->d_name, + .name = *name, .fid.vid = as->volume->vid }; int ret; - _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name); /* search the directory */ - ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version); + ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version); if (ret < 0) { _leave(" = %d [iter]", ret); return ret; @@ -655,19 +648,10 @@ static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); - if (cookie->found) { - if (cookie->nr_fids < 50) { - cookie->fids[cookie->nr_fids].vnode = ino; - cookie->fids[cookie->nr_fids].unique = dtype; - cookie->nr_fids++; - } - } else if (cookie->name.len == nlen && - memcmp(cookie->name.name, name, nlen) == 0) { - cookie->fids[1].vnode = ino; - cookie->fids[1].unique = dtype; - cookie->found = 1; - if (cookie->one_only) - return false; + if (cookie->nr_fids < 50) { + cookie->fids[cookie->nr_fids].vnode = ino; + cookie->fids[cookie->nr_fids].unique = dtype; + cookie->nr_fids++; } return cookie->nr_fids < 50; @@ -787,8 +771,7 @@ static bool afs_server_supports_ibulk(struct afs_vnode *dvnode) * files in one go and create inodes for them. The inode of the file we were * asked for is returned. */ -static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, - struct key *key) +static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry) { struct afs_lookup_cookie *cookie; struct afs_vnode_param *vp; @@ -796,6 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct inode *inode = NULL, *ti; afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version); + bool supports_ibulk; long ret; int i; @@ -812,19 +796,19 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want * and slot 0 for the directory */ - if (!afs_server_supports_ibulk(dvnode)) - cookie->one_only = true; - - /* search the directory */ - ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version); + /* Search the directory for the named entry using the hash table... */ + ret = afs_dir_search(dvnode, &dentry->d_name, &cookie->fids[1], &data_version); if (ret < 0) goto out; - dentry->d_fsdata = (void *)(unsigned long)data_version; + supports_ibulk = afs_server_supports_ibulk(dvnode); + if (supports_ibulk) { + /* ...then scan linearly from that point for entries to lookup-ahead. */ + cookie->ctx.pos = (ret + 1) * AFS_DIR_DIRENT_SIZE; + afs_dir_iterate(dir, &cookie->ctx, NULL, &data_version); + } - ret = -ENOENT; - if (!cookie->found) - goto out; + dentry->d_fsdata = (void *)(unsigned long)data_version; /* Check to see if we already have an inode for the primary fid. */ inode = ilookup5(dir->i_sb, cookie->fids[1].vnode, @@ -883,7 +867,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, * the whole operation. */ afs_op_set_error(op, -ENOTSUPP); - if (!cookie->one_only) { + if (supports_ibulk) { op->ops = &afs_inline_bulk_status_operation; afs_begin_vnode_operation(op); afs_wait_for_operation(op); @@ -925,8 +909,7 @@ out: /* * Look up an entry in a directory with @sys substitution. */ -static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry, - struct key *key) +static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry) { struct afs_sysnames *subs; struct afs_net *net = afs_i2net(dir); @@ -960,7 +943,7 @@ static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry, } strcpy(p, name); - ret = lookup_one_len(buf, dentry->d_parent, len); + ret = lookup_noperm(&QSTR(buf), dentry->d_parent); if (IS_ERR(ret) || d_is_positive(ret)) goto out_s; dput(ret); @@ -974,7 +957,6 @@ out_s: afs_put_sysnames(subs); kfree(buf); out_p: - key_put(key); return ret; } @@ -988,7 +970,6 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, struct afs_fid fid = {}; struct inode *inode; struct dentry *d; - struct key *key; int ret; _enter("{%llx:%llu},%p{%pd},", @@ -1006,15 +987,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ESTALE); } - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - return ERR_CAST(key); - } - - ret = afs_validate(dvnode, key); + ret = afs_validate(dvnode, NULL); if (ret < 0) { - key_put(key); + afs_dir_unuse_cookie(dvnode, ret); _leave(" = %d [val]", ret); return ERR_PTR(ret); } @@ -1024,15 +999,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_name.name[dentry->d_name.len - 3] == 's' && dentry->d_name.name[dentry->d_name.len - 2] == 'y' && dentry->d_name.name[dentry->d_name.len - 1] == 's') - return afs_lookup_atsys(dir, dentry, key); + return afs_lookup_atsys(dir, dentry); afs_stat_v(dvnode, n_lookup); - inode = afs_do_lookup(dir, dentry, key); - key_put(key); + inode = afs_do_lookup(dir, dentry); if (inode == ERR_PTR(-ENOENT)) - inode = afs_try_auto_mntpt(dentry, dir); - - if (!IS_ERR_OR_NULL(inode)) + inode = NULL; + else if (!IS_ERR_OR_NULL(inode)) fid = AFS_FS_I(inode)->fid; _debug("splice %p", dentry->d_inode); @@ -1050,21 +1023,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, /* * Check the validity of a dentry under RCU conditions. */ -static int afs_d_revalidate_rcu(struct dentry *dentry) +static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry) { - struct afs_vnode *dvnode; - struct dentry *parent; - struct inode *dir; long dir_version, de_version; _enter("%p", dentry); - /* Check the parent directory is still valid first. */ - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - dvnode = AFS_FS_I(dir); if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) return -ECHILD; @@ -1092,11 +1056,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode, *dir; + struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir); struct afs_fid fid; - struct dentry *parent; struct inode *inode; struct key *key; afs_dataversion_t dir_version, invalid_before; @@ -1104,7 +1068,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) int ret; if (flags & LOOKUP_RCU) - return afs_d_revalidate_rcu(dentry); + return afs_d_revalidate_rcu(dir, dentry); if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); @@ -1119,14 +1083,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(key)) key = NULL; - /* Hold the parent dentry so we can peer at it */ - parent = dget_parent(dentry); - dir = AFS_FS_I(d_inode(parent)); - /* validate the parent directory */ ret = afs_validate(dir, key); if (ret == -ERESTARTSYS) { - dput(parent); key_put(key); return ret; } @@ -1154,7 +1113,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version); + ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version); switch (ret) { case 0: /* the filename maps to something */ @@ -1198,22 +1157,19 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) goto out_valid; default: - _debug("failed to iterate dir %pd: %d", - parent, ret); + _debug("failed to iterate parent %pd2: %d", dentry, ret); goto not_found; } out_valid: dentry->d_fsdata = (void *)(unsigned long)dir_version; out_valid_noupdate: - dput(parent); key_put(key); _leave(" = 1 [valid]"); return 1; not_found: _debug("dropping dentry %pd2", dentry); - dput(parent); key_put(key); _leave(" = 0 [bad]"); @@ -1281,6 +1237,7 @@ void afs_check_for_remote_deletion(struct afs_operation *op) */ static void afs_vnode_new_inode(struct afs_operation *op) { + struct afs_vnode_param *dvp = &op->file[0]; struct afs_vnode_param *vp = &op->file[1]; struct afs_vnode *vnode; struct inode *inode; @@ -1300,6 +1257,10 @@ static void afs_vnode_new_inode(struct afs_operation *op) vnode = AFS_FS_I(inode); set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + if (S_ISDIR(inode->i_mode)) + afs_mkdir_init_dir(vnode, dvp->vnode); + else if (S_ISLNK(inode->i_mode)) + afs_init_new_symlink(vnode, op); if (!afs_op_error(op)) afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb); d_instantiate(op->dentry, inode); @@ -1316,18 +1277,21 @@ static void afs_create_success(struct afs_operation *op) static void afs_create_edit_dir(struct afs_operation *op) { + struct netfs_cache_resources cres = {}; struct afs_vnode_param *dvp = &op->file[0]; struct afs_vnode_param *vp = &op->file[1]; struct afs_vnode *dvnode = dvp->vnode; _enter("op=%08x", op->debug_id); + fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode)); down_write(&dvnode->validate_lock); if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid, op->create.reason); up_write(&dvnode->validate_lock); + fscache_end_operation(&cres); } static void afs_create_put(struct afs_operation *op) @@ -1350,11 +1314,12 @@ static const struct afs_operation_ops afs_mkdir_operation = { /* * create a directory on an AFS filesystem */ -static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir); + int ret; _enter("{%llx:%llu},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); @@ -1362,9 +1327,11 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, op = afs_alloc_operation(NULL, dvnode->volume); if (IS_ERR(op)) { d_drop(dentry); - return PTR_ERR(op); + return ERR_CAST(op); } + fscache_use_cookie(afs_vnode_cache(dvnode), true); + afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; op->file[0].modification = true; @@ -1374,7 +1341,9 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, op->create.reason = afs_edit_dir_for_mkdir; op->mtime = current_time(dir); op->ops = &afs_mkdir_operation; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + afs_dir_unuse_cookie(dvnode, ret); + return ERR_PTR(ret); } /* @@ -1387,8 +1356,8 @@ static void afs_dir_remove_subdir(struct dentry *dentry) clear_nlink(&vnode->netfs.inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); - atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + afs_clear_cb_promise(vnode, afs_cb_promise_clear_rmdir); + afs_invalidate_dir(vnode, afs_dir_invalid_subdir_removed); } } @@ -1402,18 +1371,21 @@ static void afs_rmdir_success(struct afs_operation *op) static void afs_rmdir_edit_dir(struct afs_operation *op) { + struct netfs_cache_resources cres = {}; struct afs_vnode_param *dvp = &op->file[0]; struct afs_vnode *dvnode = dvp->vnode; _enter("op=%08x", op->debug_id); afs_dir_remove_subdir(op->dentry); + fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode)); down_write(&dvnode->validate_lock); if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) afs_edit_dir_remove(dvnode, &op->dentry->d_name, afs_edit_dir_for_rmdir); up_write(&dvnode->validate_lock); + fscache_end_operation(&cres); } static void afs_rmdir_put(struct afs_operation *op) @@ -1448,6 +1420,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) if (IS_ERR(op)) return PTR_ERR(op); + fscache_use_cookie(afs_vnode_cache(dvnode), true); + afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; op->file[0].modification = true; @@ -1471,10 +1445,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) op->file[1].vnode = vnode; } - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + + /* Not all systems that can host afs servers have ENOTEMPTY. */ + if (ret == -EEXIST) + ret = -ENOTEMPTY; +out: + afs_dir_unuse_cookie(dvnode, ret); + return ret; error: - return afs_put_operation(op); + ret = afs_put_operation(op); + goto out; } /* @@ -1537,16 +1519,19 @@ static void afs_unlink_success(struct afs_operation *op) static void afs_unlink_edit_dir(struct afs_operation *op) { + struct netfs_cache_resources cres = {}; struct afs_vnode_param *dvp = &op->file[0]; struct afs_vnode *dvnode = dvp->vnode; _enter("op=%08x", op->debug_id); + fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode)); down_write(&dvnode->validate_lock); if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) afs_edit_dir_remove(dvnode, &op->dentry->d_name, afs_edit_dir_for_unlink); up_write(&dvnode->validate_lock); + fscache_end_operation(&cres); } static void afs_unlink_put(struct afs_operation *op) @@ -1585,6 +1570,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(op)) return PTR_ERR(op); + fscache_use_cookie(afs_vnode_cache(dvnode), true); + afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; op->file[0].modification = true; @@ -1631,10 +1618,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_wait_for_operation(op); } - return afs_put_operation(op); - error: - return afs_put_operation(op); + ret = afs_put_operation(op); + afs_dir_unuse_cookie(dvnode, ret); + return ret; } static const struct afs_operation_ops afs_create_operation = { @@ -1668,6 +1655,8 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir, goto error; } + fscache_use_cookie(afs_vnode_cache(dvnode), true); + afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; op->file[0].modification = true; @@ -1678,7 +1667,9 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir, op->create.reason = afs_edit_dir_for_create; op->mtime = current_time(dir); op->ops = &afs_create_operation; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + afs_dir_unuse_cookie(dvnode, ret); + return ret; error: d_drop(dentry); @@ -1743,6 +1734,8 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error; } + fscache_use_cookie(afs_vnode_cache(dvnode), true); + ret = afs_validate(vnode, op->key); if (ret < 0) goto error_op; @@ -1758,10 +1751,13 @@ static int afs_link(struct dentry *from, struct inode *dir, op->dentry_2 = from; op->ops = &afs_link_operation; op->create.reason = afs_edit_dir_for_link; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + afs_dir_unuse_cookie(dvnode, ret); + return ret; error_op: afs_put_operation(op); + afs_dir_unuse_cookie(dvnode, ret); error: d_drop(dentry); _leave(" = %d", ret); @@ -1805,6 +1801,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, goto error; } + fscache_use_cookie(afs_vnode_cache(dvnode), true); + afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; @@ -1813,7 +1811,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, op->create.reason = afs_edit_dir_for_symlink; op->create.symlink = content; op->mtime = current_time(dir); - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + afs_dir_unuse_cookie(dvnode, ret); + return ret; error: d_drop(dentry); @@ -1823,6 +1823,8 @@ error: static void afs_rename_success(struct afs_operation *op) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry)); + _enter("op=%08x", op->debug_id); op->ctime = op->file[0].scb.status.mtime_client; @@ -1832,10 +1834,28 @@ static void afs_rename_success(struct afs_operation *op) op->ctime = op->file[1].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[1]); } + + /* If we're moving a subdir between dirs, we need to update + * its DV counter too as the ".." will be altered. + */ + if (S_ISDIR(vnode->netfs.inode.i_mode) && + op->file[0].vnode != op->file[1].vnode) { + u64 new_dv; + + write_seqlock(&vnode->cb_lock); + + new_dv = vnode->status.data_version + 1; + trace_afs_set_dv(vnode, new_dv); + vnode->status.data_version = new_dv; + inode_set_iversion_raw(&vnode->netfs.inode, new_dv); + + write_sequnlock(&vnode->cb_lock); + } } static void afs_rename_edit_dir(struct afs_operation *op) { + struct netfs_cache_resources orig_cres = {}, new_cres = {}; struct afs_vnode_param *orig_dvp = &op->file[0]; struct afs_vnode_param *new_dvp = &op->file[1]; struct afs_vnode *orig_dvnode = orig_dvp->vnode; @@ -1852,6 +1872,10 @@ static void afs_rename_edit_dir(struct afs_operation *op) op->rename.rehash = NULL; } + fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode)); + if (new_dvnode != orig_dvnode) + fscache_begin_write_operation(&new_cres, afs_vnode_cache(new_dvnode)); + down_write(&orig_dvnode->validate_lock); if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta) @@ -1873,6 +1897,12 @@ static void afs_rename_edit_dir(struct afs_operation *op) &vnode->fid, afs_edit_dir_for_rename_2); } + if (S_ISDIR(vnode->netfs.inode.i_mode) && + new_dvnode != orig_dvnode && + test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + afs_edit_dir_update_dotdot(vnode, new_dvnode, + afs_edit_dir_for_rename_sub); + new_inode = d_inode(new_dentry); if (new_inode) { spin_lock(&new_inode->i_lock); @@ -1895,6 +1925,9 @@ static void afs_rename_edit_dir(struct afs_operation *op) d_move(old_dentry, new_dentry); up_write(&new_dvnode->validate_lock); + fscache_end_operation(&orig_cres); + if (new_dvnode != orig_dvnode) + fscache_end_operation(&new_cres); } static void afs_rename_put(struct afs_operation *op) @@ -1947,6 +1980,10 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (IS_ERR(op)) return PTR_ERR(op); + fscache_use_cookie(afs_vnode_cache(orig_dvnode), true); + if (new_dvnode != orig_dvnode) + fscache_use_cookie(afs_vnode_cache(new_dvnode), true); + ret = afs_validate(vnode, op->key); afs_op_set_error(op, ret); if (ret < 0) @@ -2014,47 +2051,43 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, */ d_drop(old_dentry); - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); +out: + afs_dir_unuse_cookie(orig_dvnode, ret); + if (new_dvnode != orig_dvnode) + afs_dir_unuse_cookie(new_dvnode, ret); + return ret; error: - return afs_put_operation(op); -} - -/* - * Release a directory folio and clean up its private state if it's not busy - * - return true if the folio can now be released, false if not - */ -static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags) -{ - struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio)); - - _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index); - - folio_detach_private(folio); - - /* The directory will need reloading. */ - if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_stat_v(dvnode, n_relpg); - return true; + ret = afs_put_operation(op); + goto out; } /* - * Invalidate part or all of a folio. + * Write the file contents to the cache as a single blob. */ -static void afs_dir_invalidate_folio(struct folio *folio, size_t offset, - size_t length) +int afs_single_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio)); - - _enter("{%lu},%zu,%zu", folio->index, offset, length); - - BUG_ON(!folio_test_locked(folio)); + struct afs_vnode *dvnode = AFS_FS_I(mapping->host); + struct iov_iter iter; + bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) && + !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags)); + int ret = 0; - /* The directory will need reloading. */ - if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_stat_v(dvnode, n_inval); + /* Need to lock to prevent the folio queue and folios from being thrown + * away. + */ + down_read(&dvnode->validate_lock); + + if (is_dir ? + test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) : + atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) { + iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, + i_size_read(&dvnode->netfs.inode)); + ret = netfs_writeback_single(mapping, wbc, &iter); + } - /* we clean up only if the entire folio is being invalidated */ - if (offset == 0 && length == folio_size(folio)) - folio_detach_private(folio); + up_read(&dvnode->validate_lock); + return ret; } diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index e2fa577b66fe..60a549f1d9c5 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -10,6 +10,7 @@ #include <linux/namei.h> #include <linux/pagemap.h> #include <linux/iversion.h> +#include <linux/folio_queue.h> #include "internal.h" #include "xdr_fs.h" @@ -105,32 +106,66 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block, } /* - * Get a new directory folio. + * Get a specific block, extending the directory storage to cover it as needed. */ -static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index) +static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block) { - struct address_space *mapping = vnode->netfs.inode.i_mapping; + struct folio_queue *fq; + struct afs_vnode *dvnode = iter->dvnode; struct folio *folio; + size_t blpos = block * AFS_DIR_BLOCK_SIZE; + size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos; + int ret; + + if (dvnode->directory_size < blend) { + size_t cur_size = dvnode->directory_size; + + ret = netfs_alloc_folioq_buffer( + NULL, &dvnode->directory, &cur_size, blend, + mapping_gfp_mask(dvnode->netfs.inode.i_mapping)); + dvnode->directory_size = cur_size; + if (ret < 0) + goto fail; + } - folio = __filemap_get_folio(mapping, index, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - mapping->gfp_mask); - if (IS_ERR(folio)) { - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); - return NULL; + fq = iter->fq; + if (!fq) + fq = dvnode->directory; + + /* Search the folio queue for the folio containing the block... */ + for (; fq; fq = fq->next) { + for (int s = iter->fq_slot; s < folioq_count(fq); s++) { + size_t fsize = folioq_folio_size(fq, s); + + if (blend <= fpos + fsize) { + /* ... and then return the mapped block. */ + folio = folioq_folio(fq, s); + if (WARN_ON_ONCE(folio_pos(folio) != fpos)) + goto fail; + iter->fq = fq; + iter->fq_slot = s; + iter->fpos = fpos; + return kmap_local_folio(folio, blpos - fpos); + } + fpos += fsize; + } + iter->fq_slot = 0; } - if (!folio_test_private(folio)) - folio_attach_private(folio, (void *)1); - return folio; + +fail: + iter->fq = NULL; + iter->fq_slot = 0; + afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block); + return NULL; } /* * Scan a directory block looking for a dirent of the right name. */ -static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name, +static int afs_dir_scan_block(const union afs_xdr_dir_block *block, const struct qstr *name, unsigned int blocknum) { - union afs_xdr_dirent *de; + const union afs_xdr_dirent *de; u64 bitmap; int d, len, n; @@ -209,9 +244,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode, { union afs_xdr_dir_block *meta, *block; union afs_xdr_dirent *de; - struct folio *folio0, *folio; - unsigned int need_slots, nr_blocks, b; - pgoff_t index; + struct afs_dir_iter iter = { .dvnode = vnode }; + unsigned int nr_blocks, b, entry; loff_t i_size; int slot; @@ -220,20 +254,17 @@ void afs_edit_dir_add(struct afs_vnode *vnode, i_size = i_size_read(&vnode->netfs.inode); if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_bad_size); return; } - folio0 = afs_dir_get_folio(vnode, 0); - if (!folio0) { - _leave(" [fgp]"); + meta = afs_dir_get_block(&iter, 0); + if (!meta) return; - } /* Work out how many slots we're going to need. */ - need_slots = afs_dir_calc_slots(name->len); + iter.nr_slots = afs_dir_calc_slots(name->len); - meta = kmap_local_folio(folio0, 0); if (i_size == 0) goto new_directory; nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; @@ -245,22 +276,21 @@ void afs_edit_dir_add(struct afs_vnode *vnode, /* If the directory extended into a new folio, then we need to * tack a new folio on the end. */ - index = b / AFS_DIR_BLOCKS_PER_PAGE; if (nr_blocks >= AFS_DIR_MAX_BLOCKS) - goto error; - if (index >= folio_nr_pages(folio0)) { - folio = afs_dir_get_folio(vnode, index); - if (!folio) - goto error; - } else { - folio = folio0; - } + goto error_too_many_blocks; - block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + /* Lower dir blocks have a counter in the header we can check. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR && + meta->meta.alloc_ctrs[b] < iter.nr_slots) + continue; + + block = afs_dir_get_block(&iter, b); + if (!block) + goto error; /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) - goto invalidated; + goto already_invalidated; _debug("block %u: %2u %3u %u", b, @@ -275,31 +305,23 @@ void afs_edit_dir_add(struct afs_vnode *vnode, afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE); } - /* Only lower dir blocks have a counter in the header. */ - if (b >= AFS_DIR_BLOCKS_WITH_CTR || - meta->meta.alloc_ctrs[b] >= need_slots) { - /* We need to try and find one or more consecutive - * slots to hold the entry. - */ - slot = afs_find_contig_bits(block, need_slots); - if (slot >= 0) { - _debug("slot %u", slot); - goto found_space; - } + /* We need to try and find one or more consecutive slots to + * hold the entry. + */ + slot = afs_find_contig_bits(block, iter.nr_slots); + if (slot >= 0) { + _debug("slot %u", slot); + goto found_space; } kunmap_local(block); - if (folio != folio0) { - folio_unlock(folio); - folio_put(folio); - } } /* There are no spare slots of sufficient size, yet the operation * succeeded. Download the directory again. */ trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_no_slots); goto out_unmap; new_directory: @@ -307,8 +329,7 @@ new_directory: i_size = AFS_DIR_BLOCK_SIZE; afs_set_i_size(vnode, i_size); slot = AFS_DIR_RESV_BLOCKS0; - folio = folio0; - block = kmap_local_folio(folio, 0); + block = afs_dir_get_block(&iter, 0); nr_blocks = 1; b = 0; @@ -326,41 +347,39 @@ found_space: de->u.name[name->len] = 0; /* Adjust the bitmap. */ - afs_set_contig_bits(block, slot, need_slots); - kunmap_local(block); - if (folio != folio0) { - folio_unlock(folio); - folio_put(folio); - } + afs_set_contig_bits(block, slot, iter.nr_slots); /* Adjust the allocation counter. */ if (b < AFS_DIR_BLOCKS_WITH_CTR) - meta->meta.alloc_ctrs[b] -= need_slots; + meta->meta.alloc_ctrs[b] -= iter.nr_slots; + + /* Adjust the hash chain. */ + entry = b * AFS_DIR_SLOTS_PER_BLOCK + slot; + iter.bucket = afs_dir_hash_name(name); + de->u.hash_next = meta->meta.hashtable[iter.bucket]; + meta->meta.hashtable[iter.bucket] = htons(entry); + kunmap_local(block); inode_inc_iversion_raw(&vnode->netfs.inode); afs_stat_v(vnode, n_dir_cr); _debug("Insert %s in %u[%u]", name->name, b, slot); + netfs_single_mark_inode_dirty(&vnode->netfs.inode); + out_unmap: kunmap_local(meta); - folio_unlock(folio0); - folio_put(folio0); _leave(""); return; -invalidated: +already_invalidated: trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); kunmap_local(block); - if (folio != folio0) { - folio_unlock(folio); - folio_put(folio); - } goto out_unmap; +error_too_many_blocks: + afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_too_many_blocks); error: trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); goto out_unmap; } @@ -374,13 +393,14 @@ error: void afs_edit_dir_remove(struct afs_vnode *vnode, struct qstr *name, enum afs_edit_dir_reason why) { - union afs_xdr_dir_block *meta, *block; - union afs_xdr_dirent *de; - struct folio *folio0, *folio; - unsigned int need_slots, nr_blocks, b; - pgoff_t index; + union afs_xdr_dir_block *meta, *block, *pblock; + union afs_xdr_dirent *de, *pde; + struct afs_dir_iter iter = { .dvnode = vnode }; + struct afs_fid fid; + unsigned int b, slot, entry; loff_t i_size; - int slot; + __be16 next; + int found; _enter(",,{%d,%s},", name->len, name->name); @@ -388,81 +408,95 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, if (i_size < AFS_DIR_BLOCK_SIZE || i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_bad_size); return; } - nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; - folio0 = afs_dir_get_folio(vnode, 0); - if (!folio0) { - _leave(" [fgp]"); + if (!afs_dir_init_iter(&iter, name)) return; - } - - /* Work out how many slots we're going to discard. */ - need_slots = afs_dir_calc_slots(name->len); - meta = kmap_local_folio(folio0, 0); - - /* Find a block that has sufficient slots available. Each folio - * contains two or more directory blocks. - */ - for (b = 0; b < nr_blocks; b++) { - index = b / AFS_DIR_BLOCKS_PER_PAGE; - if (index >= folio_nr_pages(folio0)) { - folio = afs_dir_get_folio(vnode, index); - if (!folio) - goto error; - } else { - folio = folio0; - } - - block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); - - /* Abandon the edit if we got a callback break. */ - if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) - goto invalidated; - - if (b > AFS_DIR_BLOCKS_WITH_CTR || - meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) { - slot = afs_dir_scan_block(block, name, b); - if (slot >= 0) - goto found_dirent; - } + meta = afs_dir_find_block(&iter, 0); + if (!meta) + return; - kunmap_local(block); - if (folio != folio0) { - folio_unlock(folio); - folio_put(folio); - } + /* Find the entry in the blob. */ + found = afs_dir_search_bucket(&iter, name, &fid); + if (found < 0) { + /* Didn't find the dirent to clobber. Re-download. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent, + 0, 0, 0, 0, name->name); + afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_wrong_name); + goto out_unmap; } - /* Didn't find the dirent to clobber. Download the directory again. */ - trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent, - 0, 0, 0, 0, name->name); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); - goto out_unmap; + entry = found; + b = entry / AFS_DIR_SLOTS_PER_BLOCK; + slot = entry % AFS_DIR_SLOTS_PER_BLOCK; -found_dirent: + block = afs_dir_find_block(&iter, b); + if (!block) + goto error; + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto already_invalidated; + + /* Check and clear the entry. */ de = &block->dirents[slot]; + if (de->u.valid != 1) + goto error_unmap; trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot, ntohl(de->u.vnode), ntohl(de->u.unique), name->name); - memset(de, 0, sizeof(*de) * need_slots); - /* Adjust the bitmap. */ - afs_clear_contig_bits(block, slot, need_slots); - kunmap_local(block); - if (folio != folio0) { - folio_unlock(folio); - folio_put(folio); - } + afs_clear_contig_bits(block, slot, iter.nr_slots); /* Adjust the allocation counter. */ if (b < AFS_DIR_BLOCKS_WITH_CTR) - meta->meta.alloc_ctrs[b] += need_slots; + meta->meta.alloc_ctrs[b] += iter.nr_slots; + + /* Clear the constituent entries. */ + next = de->u.hash_next; + memset(de, 0, sizeof(*de) * iter.nr_slots); + kunmap_local(block); + + /* Adjust the hash chain: if iter->prev_entry is 0, the hashtable head + * index is previous; otherwise it's slot number of the previous entry. + */ + if (!iter.prev_entry) { + __be16 prev_next = meta->meta.hashtable[iter.bucket]; + + if (unlikely(prev_next != htons(entry))) { + pr_warn("%llx:%llx:%x: not head of chain b=%x p=%x,%x e=%x %*s", + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + iter.bucket, iter.prev_entry, prev_next, entry, + name->len, name->name); + goto error; + } + meta->meta.hashtable[iter.bucket] = next; + } else { + unsigned int pb = iter.prev_entry / AFS_DIR_SLOTS_PER_BLOCK; + unsigned int ps = iter.prev_entry % AFS_DIR_SLOTS_PER_BLOCK; + __be16 prev_next; + + pblock = afs_dir_find_block(&iter, pb); + if (!pblock) + goto error; + pde = &pblock->dirents[ps]; + prev_next = pde->u.hash_next; + if (prev_next != htons(entry)) { + kunmap_local(pblock); + pr_warn("%llx:%llx:%x: not prev in chain b=%x p=%x,%x e=%x %*s", + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + iter.bucket, iter.prev_entry, prev_next, entry, + name->len, name->name); + goto error; + } + pde->u.hash_next = next; + kunmap_local(pblock); + } + + netfs_single_mark_inode_dirty(&vnode->netfs.inode); inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version); afs_stat_v(vnode, n_dir_rm); @@ -470,25 +504,145 @@ found_dirent: out_unmap: kunmap_local(meta); - folio_unlock(folio0); - folio_put(folio0); _leave(""); return; -invalidated: +already_invalidated: + kunmap_local(block); trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval, 0, 0, 0, 0, name->name); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); - kunmap_local(block); - if (folio != folio0) { - folio_unlock(folio); - folio_put(folio); - } goto out_unmap; +error_unmap: + kunmap_local(block); error: trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error, 0, 0, 0, 0, name->name); - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); goto out_unmap; } + +/* + * Edit a subdirectory that has been moved between directories to update the + * ".." entry. + */ +void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode, + enum afs_edit_dir_reason why) +{ + union afs_xdr_dir_block *block; + union afs_xdr_dirent *de; + struct afs_dir_iter iter = { .dvnode = vnode }; + unsigned int nr_blocks, b; + loff_t i_size; + int slot; + + _enter(""); + + i_size = i_size_read(&vnode->netfs.inode); + if (i_size < AFS_DIR_BLOCK_SIZE) { + afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size); + return; + } + + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + /* Find a block that has sufficient slots available. Each folio + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks; b++) { + block = afs_dir_get_block(&iter, b); + if (!block) + goto error; + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto already_invalidated; + + slot = afs_dir_scan_block(block, &dotdot_name, b); + if (slot >= 0) + goto found_dirent; + + kunmap_local(block); + } + + /* Didn't find the dirent to clobber. Download the directory again. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd, + 0, 0, 0, 0, ".."); + afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_no_dd); + goto out; + +found_dirent: + de = &block->dirents[slot]; + de->u.vnode = htonl(new_dvnode->fid.vnode); + de->u.unique = htonl(new_dvnode->fid.unique); + + trace_afs_edit_dir(vnode, why, afs_edit_dir_update_dd, b, slot, + ntohl(de->u.vnode), ntohl(de->u.unique), ".."); + + kunmap_local(block); + netfs_single_mark_inode_dirty(&vnode->netfs.inode); + inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version); + +out: + _leave(""); + return; + +already_invalidated: + kunmap_local(block); + trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval, + 0, 0, 0, 0, ".."); + goto out; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error, + 0, 0, 0, 0, ".."); + goto out; +} + +/* + * Initialise a new directory. We need to fill in the "." and ".." entries. + */ +void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_dvnode) +{ + union afs_xdr_dir_block *meta; + struct afs_dir_iter iter = { .dvnode = dvnode }; + union afs_xdr_dirent *de; + unsigned int slot = AFS_DIR_RESV_BLOCKS0; + loff_t i_size; + + i_size = i_size_read(&dvnode->netfs.inode); + if (i_size != AFS_DIR_BLOCK_SIZE) { + afs_invalidate_dir(dvnode, afs_dir_invalid_edit_add_bad_size); + return; + } + + meta = afs_dir_get_block(&iter, 0); + if (!meta) + return; + + afs_edit_init_block(meta, meta, 0); + + de = &meta->dirents[slot]; + de->u.valid = 1; + de->u.vnode = htonl(dvnode->fid.vnode); + de->u.unique = htonl(dvnode->fid.unique); + memcpy(de->u.name, ".", 2); + trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot, + dvnode->fid.vnode, dvnode->fid.unique, "."); + slot++; + + de = &meta->dirents[slot]; + de->u.valid = 1; + de->u.vnode = htonl(parent_dvnode->fid.vnode); + de->u.unique = htonl(parent_dvnode->fid.unique); + memcpy(de->u.name, "..", 3); + trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot, + parent_dvnode->fid.vnode, parent_dvnode->fid.unique, ".."); + + afs_set_contig_bits(meta, AFS_DIR_RESV_BLOCKS0, 2); + meta->meta.alloc_ctrs[0] -= 2; + kunmap_local(meta); + + netfs_single_mark_inode_dirty(&dvnode->netfs.inode); + set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); + set_bit(AFS_VNODE_DIR_READ, &dvnode->flags); +} diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c new file mode 100644 index 000000000000..b25bd892db4d --- /dev/null +++ b/fs/afs/dir_search.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Search a directory's hash table. + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/draft-keiser-afs3-directory-object-00 + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/iversion.h> +#include "internal.h" +#include "afs_fs.h" +#include "xdr_fs.h" + +/* + * Calculate the name hash. + */ +unsigned int afs_dir_hash_name(const struct qstr *name) +{ + const unsigned char *p = name->name; + unsigned int hash = 0, i; + int bucket; + + for (i = 0; i < name->len; i++) + hash = (hash * 173) + p[i]; + bucket = hash & (AFS_DIR_HASHTBL_SIZE - 1); + if (hash > INT_MAX) { + bucket = AFS_DIR_HASHTBL_SIZE - bucket; + bucket &= (AFS_DIR_HASHTBL_SIZE - 1); + } + return bucket; +} + +/* + * Reset a directory iterator. + */ +static bool afs_dir_reset_iter(struct afs_dir_iter *iter) +{ + unsigned long long i_size = i_size_read(&iter->dvnode->netfs.inode); + unsigned int nblocks; + + /* Work out the maximum number of steps we can take. */ + nblocks = umin(i_size / AFS_DIR_BLOCK_SIZE, AFS_DIR_MAX_BLOCKS); + if (!nblocks) + return false; + iter->loop_check = nblocks * (AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS); + iter->prev_entry = 0; /* Hash head is previous */ + return true; +} + +/* + * Initialise a directory iterator for looking up a name. + */ +bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name) +{ + iter->nr_slots = afs_dir_calc_slots(name->len); + iter->bucket = afs_dir_hash_name(name); + return afs_dir_reset_iter(iter); +} + +/* + * Get a specific block. + */ +union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block) +{ + struct folio_queue *fq = iter->fq; + struct afs_vnode *dvnode = iter->dvnode; + struct folio *folio; + size_t blpos = block * AFS_DIR_BLOCK_SIZE; + size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos; + int slot = iter->fq_slot; + + _enter("%zx,%d", block, slot); + + if (iter->block) { + kunmap_local(iter->block); + iter->block = NULL; + } + + if (dvnode->directory_size < blend) + goto fail; + + if (!fq || blpos < fpos) { + fq = dvnode->directory; + slot = 0; + fpos = 0; + } + + /* Search the folio queue for the folio containing the block... */ + for (; fq; fq = fq->next) { + for (; slot < folioq_count(fq); slot++) { + size_t fsize = folioq_folio_size(fq, slot); + + if (blend <= fpos + fsize) { + /* ... and then return the mapped block. */ + folio = folioq_folio(fq, slot); + if (WARN_ON_ONCE(folio_pos(folio) != fpos)) + goto fail; + iter->fq = fq; + iter->fq_slot = slot; + iter->fpos = fpos; + iter->block = kmap_local_folio(folio, blpos - fpos); + return iter->block; + } + fpos += fsize; + } + slot = 0; + } + +fail: + iter->fq = NULL; + iter->fq_slot = 0; + afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block); + return NULL; +} + +/* + * Search through a directory bucket. + */ +int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name, + struct afs_fid *_fid) +{ + const union afs_xdr_dir_block *meta; + unsigned int entry; + int ret = -ESTALE; + + meta = afs_dir_find_block(iter, 0); + if (!meta) + return -ESTALE; + + entry = ntohs(meta->meta.hashtable[iter->bucket & (AFS_DIR_HASHTBL_SIZE - 1)]); + _enter("%x,%x", iter->bucket, entry); + + while (entry) { + const union afs_xdr_dir_block *block; + const union afs_xdr_dirent *dire; + unsigned int blnum = entry / AFS_DIR_SLOTS_PER_BLOCK; + unsigned int slot = entry % AFS_DIR_SLOTS_PER_BLOCK; + unsigned int resv = (blnum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + + _debug("search %x", entry); + + if (slot < resv) { + kdebug("slot out of range h=%x rs=%2x sl=%2x-%2x", + iter->bucket, resv, slot, slot + iter->nr_slots - 1); + goto bad; + } + + block = afs_dir_find_block(iter, blnum); + if (!block) + goto bad; + dire = &block->dirents[slot]; + + if (slot + iter->nr_slots <= AFS_DIR_SLOTS_PER_BLOCK && + memcmp(dire->u.name, name->name, name->len) == 0 && + dire->u.name[name->len] == '\0') { + _fid->vnode = ntohl(dire->u.vnode); + _fid->unique = ntohl(dire->u.unique); + ret = entry; + goto found; + } + + iter->prev_entry = entry; + entry = ntohs(dire->u.hash_next); + if (!--iter->loop_check) { + kdebug("dir chain loop h=%x", iter->bucket); + goto bad; + } + } + + ret = -ENOENT; +found: + if (iter->block) { + kunmap_local(iter->block); + iter->block = NULL; + } + +bad: + if (ret == -ESTALE) + afs_invalidate_dir(iter->dvnode, afs_dir_invalid_iter_stale); + _leave(" = %d", ret); + return ret; +} + +/* + * Search the appropriate hash chain in the contents of an AFS directory. + */ +int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name, + struct afs_fid *_fid, afs_dataversion_t *_dir_version) +{ + struct afs_dir_iter iter = { .dvnode = dvnode, }; + int ret, retry_limit = 3; + + _enter("{%lu},,,", dvnode->netfs.inode.i_ino); + + if (!afs_dir_init_iter(&iter, name)) + return -ENOENT; + do { + if (--retry_limit < 0) { + pr_warn("afs_read_dir(): Too many retries\n"); + ret = -ESTALE; + break; + } + ret = afs_read_dir(dvnode, NULL); + if (ret < 0) { + if (ret != -ESTALE) + break; + if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) { + ret = -ESTALE; + break; + } + continue; + } + *_dir_version = inode_peek_iversion_raw(&dvnode->netfs.inode); + + ret = afs_dir_search_bucket(&iter, name, _fid); + up_read(&dvnode->validate_lock); + if (ret == -ESTALE) + afs_dir_reset_iter(&iter); + } while (ret == -ESTALE); + + _leave(" = %d", ret); + return ret; +} diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index a1e581946b93..0b80eb93fa40 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -113,16 +113,14 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode, sdentry = NULL; do { - int slen; - dput(sdentry); sillycounter++; /* Create a silly name. Note that the ".__afs" prefix is * understood by the salvager and must not be changed. */ - slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter); - sdentry = lookup_one_len(silly, dentry->d_parent, slen); + scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter); + sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent); /* N.B. Better to return EBUSY here ... it could be dangerous * to delete the file while it's in use. diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index c4d2711e20ad..8c6130789fde 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -10,16 +10,19 @@ #include <linux/dns_resolver.h> #include "internal.h" -static atomic_t afs_autocell_ino; +#define AFS_MIN_DYNROOT_CELL_INO 4 /* Allow for ., .., @cell, .@cell */ +#define AFS_MAX_DYNROOT_CELL_INO ((unsigned int)INT_MAX) + +static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino); /* * iget5() comparator for inode created by autocell operations - * - * These pseudo inodes don't match anything. */ static int afs_iget5_pseudo_test(struct inode *inode, void *opaque) { - return 0; + struct afs_fid *fid = opaque; + + return inode->i_ino == fid->vnode; } /* @@ -39,28 +42,16 @@ static int afs_iget5_pseudo_set(struct inode *inode, void *opaque) } /* - * Create an inode for a dynamic root directory or an autocell dynamic - * automount dir. + * Create an inode for an autocell dynamic automount dir. */ -struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) +static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino) { - struct afs_super_info *as = AFS_FS_S(sb); struct afs_vnode *vnode; struct inode *inode; - struct afs_fid fid = {}; + struct afs_fid fid = { .vnode = ino, .unique = 1, }; _enter(""); - if (as->volume) - fid.vid = as->volume->vid; - if (root) { - fid.vnode = 1; - fid.unique = 1; - } else { - fid.vnode = atomic_inc_return(&afs_autocell_ino); - fid.unique = 0; - } - inode = iget5_locked(sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) { @@ -73,163 +64,75 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) vnode = AFS_FS_I(inode); - /* there shouldn't be an existing inode */ - BUG_ON(!(inode->i_state & I_NEW)); - - netfs_inode_init(&vnode->netfs, NULL, false); - inode->i_size = 0; - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - if (root) { - inode->i_op = &afs_dynroot_inode_operations; - inode->i_fop = &simple_dir_operations; - } else { - inode->i_op = &afs_autocell_inode_operations; - } - set_nlink(inode, 2); - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - simple_inode_init_ts(inode); - inode->i_blocks = 0; - inode->i_generation = 0; - - set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); - if (!root) { + if (inode->i_state & I_NEW) { + netfs_inode_init(&vnode->netfs, NULL, false); + simple_inode_init_ts(inode); + set_nlink(inode, 2); + inode->i_size = 0; + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &afs_autocell_inode_operations; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_blocks = 0; + inode->i_generation = 0; + inode->i_flags |= S_AUTOMOUNT | S_NOATIME; + + set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - inode->i_flags |= S_AUTOMOUNT; - } - inode->i_flags |= S_NOATIME; - unlock_new_inode(inode); + unlock_new_inode(inode); + } _leave(" = %p", inode); return inode; } /* - * Probe to see if a cell may exist. This prevents positive dentries from - * being created unnecessarily. + * Try to automount the mountpoint with pseudo directory, if the autocell + * option is set. */ -static int afs_probe_cell_name(struct dentry *dentry) +static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry *dentry, + unsigned int flags) { - struct afs_cell *cell; + struct afs_cell *cell = NULL; struct afs_net *net = afs_d2net(dentry); + struct inode *inode = NULL; const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; - char *result = NULL; - int ret; + bool dotted = false; + int ret = -ENOENT; /* Names prefixed with a dot are R/W mounts. */ if (name[0] == '.') { - if (len == 1) - return -EINVAL; name++; len--; + dotted = true; } - cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe); - if (!IS_ERR(cell)) { - afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe); - return 0; - } - - ret = dns_query(net->net, "afsdb", name, len, "srv=1", - &result, NULL, false); - if (ret == -ENODATA || ret == -ENOKEY || ret == 0) - ret = -ENOENT; - if (ret > 0 && ret >= sizeof(struct dns_server_list_v1_header)) { - struct dns_server_list_v1_header *v1 = (void *)result; - - if (v1->hdr.zero == 0 && - v1->hdr.content == DNS_PAYLOAD_IS_SERVER_LIST && - v1->hdr.version == 1 && - (v1->status != DNS_LOOKUP_GOOD && - v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) - return -ENOENT; - + cell = afs_lookup_cell(net, name, len, NULL, false, + afs_cell_trace_use_lookup_dynroot); + if (IS_ERR(cell)) { + ret = PTR_ERR(cell); + goto out_no_cell; } - kfree(result); - return ret; -} - -/* - * Try to auto mount the mountpoint with pseudo directory, if the autocell - * operation is setted. - */ -struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) -{ - struct afs_vnode *vnode = AFS_FS_I(dir); - struct inode *inode; - int ret = -ENOENT; - - _enter("%p{%pd}, {%llx:%llu}", - dentry, dentry, vnode->fid.vid, vnode->fid.vnode); - - if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) - goto out; - - ret = afs_probe_cell_name(dentry); - if (ret < 0) - goto out; - - inode = afs_iget_pseudo_dir(dir->i_sb, false); + inode = afs_iget_pseudo_dir(dir->i_sb, cell->dynroot_ino * 2 + dotted); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out; } - _leave("= %p", inode); - return inode; + dentry->d_fsdata = cell; + return d_splice_alias(inode, dentry); out: - _leave("= %d", ret); + afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_dynroot); +out_no_cell: + if (!inode) + return d_splice_alias(inode, dentry); return ret == -ENOENT ? NULL : ERR_PTR(ret); } /* - * Look up @cell in a dynroot directory. This is a substitution for the - * local cell name for the net namespace. - */ -static struct dentry *afs_lookup_atcell(struct dentry *dentry) -{ - struct afs_cell *cell; - struct afs_net *net = afs_d2net(dentry); - struct dentry *ret; - char *name; - int len; - - if (!net->ws_cell) - return ERR_PTR(-ENOENT); - - ret = ERR_PTR(-ENOMEM); - name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL); - if (!name) - goto out_p; - - down_read(&net->cells_lock); - cell = net->ws_cell; - if (cell) { - len = cell->name_len; - memcpy(name, cell->name, len + 1); - } - up_read(&net->cells_lock); - - ret = ERR_PTR(-ENOENT); - if (!cell) - goto out_n; - - ret = lookup_one_len(name, dentry->d_parent, len); - - /* We don't want to d_add() the @cell dentry here as we don't want to - * the cached dentry to hide changes to the local cell name. - */ - -out_n: - kfree(name); -out_p: - return ret; -} - -/* * Look up an entry in a dynroot directory. */ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, @@ -237,8 +140,6 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr { _enter("%pd", dentry); - ASSERTCMP(d_inode(dentry), ==, NULL); - if (flags & LOOKUP_CREATE) return ERR_PTR(-EOPNOTSUPP); @@ -249,141 +150,256 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr if (dentry->d_name.len == 5 && memcmp(dentry->d_name.name, "@cell", 5) == 0) - return afs_lookup_atcell(dentry); + return afs_lookup_atcell(dir, dentry, 2); + + if (dentry->d_name.len == 6 && + memcmp(dentry->d_name.name, ".@cell", 6) == 0) + return afs_lookup_atcell(dir, dentry, 3); - return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry); + return afs_dynroot_lookup_cell(dir, dentry, flags); } const struct inode_operations afs_dynroot_inode_operations = { .lookup = afs_dynroot_lookup, }; +static void afs_dynroot_d_release(struct dentry *dentry) +{ + struct afs_cell *cell = dentry->d_fsdata; + + afs_unuse_cell(cell, afs_cell_trace_unuse_dynroot_mntpt); +} + +/* + * Keep @cell symlink dentries around, but only keep cell autodirs when they're + * being used. + */ +static int afs_dynroot_delete_dentry(const struct dentry *dentry) +{ + const struct qstr *name = &dentry->d_name; + + if (name->len == 5 && memcmp(name->name, "@cell", 5) == 0) + return 0; + if (name->len == 6 && memcmp(name->name, ".@cell", 6) == 0) + return 0; + return 1; +} + const struct dentry_operations afs_dynroot_dentry_operations = { - .d_delete = always_delete_dentry, - .d_release = afs_d_release, + .d_delete = afs_dynroot_delete_dentry, + .d_release = afs_dynroot_d_release, .d_automount = afs_d_automount, }; +static void afs_atcell_delayed_put_cell(void *arg) +{ + struct afs_cell *cell = arg; + + afs_put_cell(cell, afs_cell_trace_put_atcell); +} + /* - * Create a manually added cell mount directory. - * - The caller must hold net->proc_cells_lock + * Read @cell or .@cell symlinks. */ -int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell) +static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) { - struct super_block *sb = net->dynroot_sb; - struct dentry *root, *subdir; - int ret; + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_cell *cell; + struct afs_net *net = afs_i2net(inode); + const char *name; + bool dotted = vnode->fid.vnode == 3; - if (!sb || atomic_read(&sb->s_active) == 0) - return 0; + if (!rcu_access_pointer(net->ws_cell)) + return ERR_PTR(-ENOENT); - /* Let the ->lookup op do the creation */ - root = sb->s_root; - inode_lock(root->d_inode); - subdir = lookup_one_len(cell->name, root, cell->name_len); - if (IS_ERR(subdir)) { - ret = PTR_ERR(subdir); - goto unlock; + if (!dentry) { + /* We're in RCU-pathwalk. */ + cell = rcu_dereference(net->ws_cell); + if (dotted) + name = cell->name - 1; + else + name = cell->name; + /* Shouldn't need to set a delayed call. */ + return name; } - /* Note that we're retaining an extra ref on the dentry */ - subdir->d_fsdata = (void *)1UL; - ret = 0; -unlock: - inode_unlock(root->d_inode); - return ret; + down_read(&net->cells_lock); + + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); + if (dotted) + name = cell->name - 1; + else + name = cell->name; + afs_get_cell(cell, afs_cell_trace_get_atcell); + set_delayed_call(done, afs_atcell_delayed_put_cell, cell); + + up_read(&net->cells_lock); + return name; } +static const struct inode_operations afs_atcell_inode_operations = { + .get_link = afs_atcell_get_link, +}; + /* - * Remove a manually added cell mount directory. - * - The caller must hold net->proc_cells_lock + * Create an inode for the @cell or .@cell symlinks. */ -void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell) +static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino) { - struct super_block *sb = net->dynroot_sb; - struct dentry *root, *subdir; + struct afs_vnode *vnode; + struct inode *inode; + struct afs_fid fid = { .vnode = ino, .unique = 1, }; - if (!sb || atomic_read(&sb->s_active) == 0) - return; + inode = iget5_locked(dir->i_sb, fid.vnode, + afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); + if (!inode) + return ERR_PTR(-ENOMEM); - root = sb->s_root; - inode_lock(root->d_inode); + vnode = AFS_FS_I(inode); - /* Don't want to trigger a lookup call, which will re-add the cell */ - subdir = try_lookup_one_len(cell->name, root, cell->name_len); - if (IS_ERR_OR_NULL(subdir)) { - _debug("lookup %ld", PTR_ERR(subdir)); - goto no_dentry; + if (inode->i_state & I_NEW) { + netfs_inode_init(&vnode->netfs, NULL, false); + simple_inode_init_ts(inode); + set_nlink(inode, 1); + inode->i_size = 0; + inode->i_mode = S_IFLNK | 0555; + inode->i_op = &afs_atcell_inode_operations; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_blocks = 0; + inode->i_generation = 0; + inode->i_flags |= S_NOATIME; + + unlock_new_inode(inode); } + return d_splice_alias(inode, dentry); +} - _debug("rmdir %pd %u", subdir, d_count(subdir)); +/* + * Transcribe the cell database into readdir content under the RCU read lock. + * Each cell produces two entries, one prefixed with a dot and one not. + */ +static int afs_dynroot_readdir_cells(struct afs_net *net, struct dir_context *ctx) +{ + const struct afs_cell *cell; + loff_t newpos; + + _enter("%llu", ctx->pos); + + for (;;) { + unsigned int ix = ctx->pos >> 1; + + cell = idr_get_next(&net->cells_dyn_ino, &ix); + if (!cell) + return 0; + if (READ_ONCE(cell->state) == AFS_CELL_REMOVING || + READ_ONCE(cell->state) == AFS_CELL_DEAD) { + ctx->pos += 2; + ctx->pos &= ~1; + continue; + } + + newpos = ix << 1; + if (newpos > ctx->pos) + ctx->pos = newpos; - if (subdir->d_fsdata) { - _debug("unpin %u", d_count(subdir)); - subdir->d_fsdata = NULL; - dput(subdir); + _debug("pos %llu -> cell %u", ctx->pos, cell->dynroot_ino); + + if ((ctx->pos & 1) == 0) { + if (!dir_emit(ctx, cell->name, cell->name_len, + cell->dynroot_ino, DT_DIR)) + return 0; + ctx->pos++; + } + if ((ctx->pos & 1) == 1) { + if (!dir_emit(ctx, cell->name - 1, cell->name_len + 1, + cell->dynroot_ino + 1, DT_DIR)) + return 0; + ctx->pos++; + } } - dput(subdir); -no_dentry: - inode_unlock(root->d_inode); - _leave(""); + return 0; } /* - * Populate a newly created dynamic root with cell names. + * Read the AFS dynamic root directory. This produces a list of cellnames, + * dotted and undotted, along with @cell and .@cell links if configured. */ -int afs_dynroot_populate(struct super_block *sb) +static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) { - struct afs_cell *cell; - struct afs_net *net = afs_sb2net(sb); - int ret; + struct afs_net *net = afs_d2net(file->f_path.dentry); + int ret = 0; - mutex_lock(&net->proc_cells_lock); + if (!dir_emit_dots(file, ctx)) + return 0; - net->dynroot_sb = sb; - hlist_for_each_entry(cell, &net->proc_cells, proc_link) { - ret = afs_dynroot_mkdir(net, cell); - if (ret < 0) - goto error; + if (ctx->pos == 2) { + if (rcu_access_pointer(net->ws_cell) && + !dir_emit(ctx, "@cell", 5, 2, DT_LNK)) + return 0; + ctx->pos = 3; + } + if (ctx->pos == 3) { + if (rcu_access_pointer(net->ws_cell) && + !dir_emit(ctx, ".@cell", 6, 3, DT_LNK)) + return 0; + ctx->pos = 4; } - ret = 0; -out: - mutex_unlock(&net->proc_cells_lock); + if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { + down_read(&net->cells_lock); + ret = afs_dynroot_readdir_cells(net, ctx); + up_read(&net->cells_lock); + } return ret; - -error: - net->dynroot_sb = NULL; - goto out; } +static const struct file_operations afs_dynroot_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate_shared = afs_dynroot_readdir, + .fsync = noop_fsync, +}; + /* - * When a dynamic root that's in the process of being destroyed, depopulate it - * of pinned directories. + * Create an inode for a dynamic root directory. */ -void afs_dynroot_depopulate(struct super_block *sb) +struct inode *afs_dynroot_iget_root(struct super_block *sb) { - struct afs_net *net = afs_sb2net(sb); - struct dentry *root = sb->s_root, *subdir; - - /* Prevent more subdirs from being created */ - mutex_lock(&net->proc_cells_lock); - if (net->dynroot_sb == sb) - net->dynroot_sb = NULL; - mutex_unlock(&net->proc_cells_lock); - - if (root) { - struct hlist_node *n; - inode_lock(root->d_inode); - - /* Remove all the pins for dirs created for manually added cells */ - hlist_for_each_entry_safe(subdir, n, &root->d_children, d_sib) { - if (subdir->d_fsdata) { - subdir->d_fsdata = NULL; - dput(subdir); - } - } + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_vnode *vnode; + struct inode *inode; + struct afs_fid fid = { .vid = 0, .vnode = 1, .unique = 1,}; + + if (as->volume) + fid.vid = as->volume->vid; - inode_unlock(root->d_inode); + inode = iget5_locked(sb, fid.vnode, + afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); + if (!inode) + return ERR_PTR(-ENOMEM); + + vnode = AFS_FS_I(inode); + + /* there shouldn't be an existing inode */ + if (inode->i_state & I_NEW) { + netfs_inode_init(&vnode->netfs, NULL, false); + simple_inode_init_ts(inode); + set_nlink(inode, 2); + inode->i_size = 0; + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &afs_dynroot_inode_operations; + inode->i_fop = &afs_dynroot_file_operations; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_blocks = 0; + inode->i_generation = 0; + inode->i_flags |= S_NOATIME; + + set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); + unlock_new_inode(inode); } + _leave(" = %p", inode); + return inode; } diff --git a/fs/afs/file.c b/fs/afs/file.c index ef2cc8f565d2..fc15497608c6 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -16,10 +16,10 @@ #include <linux/mm.h> #include <linux/swap.h> #include <linux/netfs.h> +#include <trace/events/netfs.h> #include "internal.h" static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); -static int afs_symlink_read_folio(struct file *file, struct folio *folio); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, @@ -54,20 +54,12 @@ const struct address_space_operations afs_file_aops = { .read_folio = netfs_read_folio, .readahead = netfs_readahead, .dirty_folio = netfs_dirty_folio, - .launder_folio = netfs_launder_folio, .release_folio = netfs_release_folio, .invalidate_folio = netfs_invalidate_folio, .migrate_folio = filemap_migrate_folio, .writepages = afs_writepages, }; -const struct address_space_operations afs_symlink_aops = { - .read_folio = afs_symlink_read_folio, - .release_folio = netfs_release_folio, - .invalidate_folio = netfs_invalidate_folio, - .migrate_folio = filemap_migrate_folio, -}; - static const struct vm_operations_struct afs_vm_ops = { .open = afs_vm_open, .close = afs_vm_close, @@ -208,47 +200,12 @@ int afs_release(struct inode *inode, struct file *file) return ret; } -/* - * Allocate a new read record. - */ -struct afs_read *afs_alloc_read(gfp_t gfp) -{ - struct afs_read *req; - - req = kzalloc(sizeof(struct afs_read), gfp); - if (req) - refcount_set(&req->usage, 1); - - return req; -} - -/* - * Dispose of a ref to a read record. - */ -void afs_put_read(struct afs_read *req) -{ - if (refcount_dec_and_test(&req->usage)) { - if (req->cleanup) - req->cleanup(req); - key_put(req->key); - kfree(req); - } -} - static void afs_fetch_data_notify(struct afs_operation *op) { - struct afs_read *req = op->fetch.req; - struct netfs_io_subrequest *subreq = req->subreq; - int error = afs_op_error(op); - - req->error = error; - if (subreq) { - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); - netfs_subreq_terminated(subreq, error ?: req->actual_len, false); - req->subreq = NULL; - } else if (req->done) { - req->done(req); - } + struct netfs_io_subrequest *subreq = op->fetch.subreq; + + subreq->error = afs_op_error(op); + netfs_read_subreq_terminated(subreq); } static void afs_fetch_data_success(struct afs_operation *op) @@ -258,103 +215,198 @@ static void afs_fetch_data_success(struct afs_operation *op) _enter("op=%08x", op->debug_id); afs_vnode_commit_status(op, &op->file[0]); afs_stat_v(vnode, n_fetches); - atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes); + atomic_long_add(op->fetch.subreq->transferred, &op->net->n_fetch_bytes); afs_fetch_data_notify(op); } -static void afs_fetch_data_put(struct afs_operation *op) +static void afs_fetch_data_aborted(struct afs_operation *op) { - op->fetch.req->error = afs_op_error(op); - afs_put_read(op->fetch.req); + afs_check_for_remote_deletion(op); + afs_fetch_data_notify(op); } -static const struct afs_operation_ops afs_fetch_data_operation = { +const struct afs_operation_ops afs_fetch_data_operation = { .issue_afs_rpc = afs_fs_fetch_data, .issue_yfs_rpc = yfs_fs_fetch_data, .success = afs_fetch_data_success, - .aborted = afs_check_for_remote_deletion, + .aborted = afs_fetch_data_aborted, .failed = afs_fetch_data_notify, - .put = afs_fetch_data_put, }; +static void afs_issue_read_call(struct afs_operation *op) +{ + op->call_responded = false; + op->call_error = 0; + op->call_abort_code = 0; + if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags)) + yfs_fs_fetch_data(op); + else + afs_fs_fetch_data(op); +} + +static void afs_end_read(struct afs_operation *op) +{ + if (op->call_responded && op->server) + set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags); + + if (!afs_op_error(op)) + afs_fetch_data_success(op); + else if (op->cumul_error.aborted) + afs_fetch_data_aborted(op); + else + afs_fetch_data_notify(op); + + afs_end_vnode_operation(op); + afs_put_operation(op); +} + +/* + * Perform I/O processing on an asynchronous call. The work item carries a ref + * to the call struct that we either need to release or to pass on. + */ +static void afs_read_receive(struct afs_call *call) +{ + struct afs_operation *op = call->op; + enum afs_call_state state; + + _enter(""); + + state = READ_ONCE(call->state); + if (state == AFS_CALL_COMPLETE) + return; + trace_afs_read_recv(op, call); + + while (state < AFS_CALL_COMPLETE && READ_ONCE(call->need_attention)) { + WRITE_ONCE(call->need_attention, false); + afs_deliver_to_call(call); + state = READ_ONCE(call->state); + } + + if (state < AFS_CALL_COMPLETE) { + netfs_read_subreq_progress(op->fetch.subreq); + if (rxrpc_kernel_check_life(call->net->socket, call->rxcall)) + return; + /* rxrpc terminated the call. */ + afs_set_call_complete(call, call->error, call->abort_code); + } + + op->call_abort_code = call->abort_code; + op->call_error = call->error; + op->call_responded = call->responded; + op->call = NULL; + call->op = NULL; + afs_put_call(call); + + /* If the call failed, then we need to crank the server rotation + * handle and try the next. + */ + if (afs_select_fileserver(op)) { + afs_issue_read_call(op); + return; + } + + afs_end_read(op); +} + +void afs_fetch_data_async_rx(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, async_work); + + afs_read_receive(call); + afs_put_call(call); +} + +void afs_fetch_data_immediate_cancel(struct afs_call *call) +{ + if (call->async) { + afs_get_call(call, afs_call_trace_wake); + if (!queue_work(afs_async_calls, &call->async_work)) + afs_deferred_put_call(call); + flush_work(&call->async_work); + } +} + /* * Fetch file data from the volume. */ -int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req) +static void afs_issue_read(struct netfs_io_subrequest *subreq) { struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); + struct key *key = subreq->rreq->netfs_priv; _enter("%s{%llx:%llu.%u},%x,,,", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - key_serial(req->key)); + key_serial(key)); - op = afs_alloc_operation(req->key, vnode->volume); + op = afs_alloc_operation(key, vnode->volume); if (IS_ERR(op)) { - if (req->subreq) - netfs_subreq_terminated(req->subreq, PTR_ERR(op), false); - return PTR_ERR(op); + subreq->error = PTR_ERR(op); + netfs_read_subreq_terminated(subreq); + return; } afs_op_set_vnode(op, 0, vnode); - op->fetch.req = afs_get_read(req); + op->fetch.subreq = subreq; op->ops = &afs_fetch_data_operation; - return afs_do_sync_operation(op); -} -static void afs_issue_read(struct netfs_io_subrequest *subreq) -{ - struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); - struct afs_read *fsreq; - - fsreq = afs_alloc_read(GFP_NOFS); - if (!fsreq) - return netfs_subreq_terminated(subreq, -ENOMEM, false); - - fsreq->subreq = subreq; - fsreq->pos = subreq->start + subreq->transferred; - fsreq->len = subreq->len - subreq->transferred; - fsreq->key = key_get(subreq->rreq->netfs_priv); - fsreq->vnode = vnode; - fsreq->iter = &subreq->io_iter; - - afs_fetch_data(fsreq->vnode, fsreq); - afs_put_read(fsreq); -} + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); -static int afs_symlink_read_folio(struct file *file, struct folio *folio) -{ - struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host); - struct afs_read *fsreq; - int ret; + if (subreq->rreq->origin == NETFS_READAHEAD || + subreq->rreq->iocb) { + op->flags |= AFS_OPERATION_ASYNC; - fsreq = afs_alloc_read(GFP_NOFS); - if (!fsreq) - return -ENOMEM; + if (!afs_begin_vnode_operation(op)) { + subreq->error = afs_put_operation(op); + netfs_read_subreq_terminated(subreq); + return; + } - fsreq->pos = folio_pos(folio); - fsreq->len = folio_size(folio); - fsreq->vnode = vnode; - fsreq->iter = &fsreq->def_iter; - iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages, - fsreq->pos, fsreq->len); + if (!afs_select_fileserver(op)) { + afs_end_read(op); + return; + } - ret = afs_fetch_data(fsreq->vnode, fsreq); - if (ret == 0) - folio_mark_uptodate(folio); - folio_unlock(folio); - return ret; + afs_issue_read_call(op); + } else { + afs_do_sync_operation(op); + } } static int afs_init_request(struct netfs_io_request *rreq, struct file *file) { + struct afs_vnode *vnode = AFS_FS_I(rreq->inode); + if (file) rreq->netfs_priv = key_get(afs_file_key(file)); rreq->rsize = 256 * 1024; - rreq->wsize = 256 * 1024; + rreq->wsize = 256 * 1024 * 1024; + + switch (rreq->origin) { + case NETFS_READ_SINGLE: + if (!file) { + struct key *key = afs_request_key(vnode->volume->cell); + + if (IS_ERR(key)) + return PTR_ERR(key); + rreq->netfs_priv = key; + } + break; + case NETFS_WRITEBACK: + case NETFS_WRITETHROUGH: + case NETFS_UNBUFFERED_WRITE: + case NETFS_DIO_WRITE: + if (S_ISREG(rreq->inode->i_mode)) + rreq->io_streams[0].avail = true; + break; + case NETFS_WRITEBACK_SINGLE: + default: + break; + } return 0; } @@ -369,6 +421,7 @@ static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, static void afs_free_request(struct netfs_io_request *rreq) { key_put(rreq->netfs_priv); + afs_put_wb_key(rreq->netfs_priv2); } static void afs_update_i_size(struct inode *inode, loff_t new_i_size) @@ -400,7 +453,10 @@ const struct netfs_request_ops afs_req_ops = { .issue_read = afs_issue_read, .update_i_size = afs_update_i_size, .invalidate_cache = afs_netfs_invalidate_cache, - .create_write_requests = afs_create_write_requests, + .begin_writeback = afs_begin_writeback, + .prepare_write = afs_prepare_write, + .issue_write = afs_issue_write, + .retry_request = afs_retry_request, }; static void afs_add_open_mmap(struct afs_vnode *vnode) diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 3546b087e791..8418813ee043 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -49,6 +49,105 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo return op; } +struct afs_io_locker { + struct list_head link; + struct task_struct *task; + unsigned long have_lock; +}; + +/* + * Unlock the I/O lock on a vnode. + */ +static void afs_unlock_for_io(struct afs_vnode *vnode) +{ + struct afs_io_locker *locker; + + spin_lock(&vnode->lock); + locker = list_first_entry_or_null(&vnode->io_lock_waiters, + struct afs_io_locker, link); + if (locker) { + list_del(&locker->link); + smp_store_release(&locker->have_lock, 1); /* The unlock barrier. */ + smp_mb__after_atomic(); /* Store have_lock before task state */ + wake_up_process(locker->task); + } else { + clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags); + } + spin_unlock(&vnode->lock); +} + +/* + * Lock the I/O lock on a vnode uninterruptibly. We can't use an ordinary + * mutex as lockdep will complain if we unlock it in the wrong thread. + */ +static void afs_lock_for_io(struct afs_vnode *vnode) +{ + struct afs_io_locker myself = { .task = current, }; + + spin_lock(&vnode->lock); + + if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) { + spin_unlock(&vnode->lock); + return; + } + + list_add_tail(&myself.link, &vnode->io_lock_waiters); + spin_unlock(&vnode->lock); + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (smp_load_acquire(&myself.have_lock)) /* The lock barrier */ + break; + schedule(); + } + __set_current_state(TASK_RUNNING); +} + +/* + * Lock the I/O lock on a vnode interruptibly. We can't use an ordinary mutex + * as lockdep will complain if we unlock it in the wrong thread. + */ +static int afs_lock_for_io_interruptible(struct afs_vnode *vnode) +{ + struct afs_io_locker myself = { .task = current, }; + int ret = 0; + + spin_lock(&vnode->lock); + + if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) { + spin_unlock(&vnode->lock); + return 0; + } + + list_add_tail(&myself.link, &vnode->io_lock_waiters); + spin_unlock(&vnode->lock); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (smp_load_acquire(&myself.have_lock) || /* The lock barrier */ + signal_pending(current)) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + + /* If we got a signal, try to transfer the lock onto the next + * waiter. + */ + if (unlikely(signal_pending(current))) { + spin_lock(&vnode->lock); + if (myself.have_lock) { + spin_unlock(&vnode->lock); + afs_unlock_for_io(vnode); + } else { + list_del(&myself.link); + spin_unlock(&vnode->lock); + } + ret = -ERESTARTSYS; + } + return ret; +} + /* * Lock the vnode(s) being operated upon. */ @@ -60,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op) _enter(""); if (op->flags & AFS_OPERATION_UNINTR) { - mutex_lock(&vnode->io_lock); + afs_lock_for_io(vnode); op->flags |= AFS_OPERATION_LOCK_0; _leave(" = t [1]"); return true; @@ -72,7 +171,7 @@ static bool afs_get_io_locks(struct afs_operation *op) if (vnode2 > vnode) swap(vnode, vnode2); - if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + if (afs_lock_for_io_interruptible(vnode) < 0) { afs_op_set_error(op, -ERESTARTSYS); op->flags |= AFS_OPERATION_STOP; _leave(" = f [I 0]"); @@ -81,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op) op->flags |= AFS_OPERATION_LOCK_0; if (vnode2) { - if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) { + if (afs_lock_for_io_interruptible(vnode2) < 0) { afs_op_set_error(op, -ERESTARTSYS); op->flags |= AFS_OPERATION_STOP; - mutex_unlock(&vnode->io_lock); + afs_unlock_for_io(vnode); op->flags &= ~AFS_OPERATION_LOCK_0; _leave(" = f [I 1]"); return false; @@ -104,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op) _enter(""); if (op->flags & AFS_OPERATION_LOCK_1) - mutex_unlock(&vnode2->io_lock); + afs_unlock_for_io(vnode2); if (op->flags & AFS_OPERATION_LOCK_0) - mutex_unlock(&vnode->io_lock); + afs_unlock_for_io(vnode); } static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp, @@ -157,7 +256,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op) /* * Tidy up a filesystem cursor and unlock the vnode. */ -static void afs_end_vnode_operation(struct afs_operation *op) +void afs_end_vnode_operation(struct afs_operation *op) { _enter(""); @@ -201,7 +300,7 @@ void afs_wait_for_operation(struct afs_operation *op) } } - if (op->call_responded) + if (op->call_responded && op->server) set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags); if (!afs_op_error(op)) { diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 580de4adaaf6..e0030ac74ea0 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -235,20 +235,20 @@ out: * Probe all of a fileserver's addresses to find out the best route and to * query its capabilities. */ -void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, - struct afs_addr_list *new_alist, struct key *key) +int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, + struct afs_addr_list *new_alist, struct key *key) { struct afs_endpoint_state *estate, *old; - struct afs_addr_list *alist; + struct afs_addr_list *old_alist = NULL, *alist; unsigned long unprobed; _enter("%pU", &server->uuid); estate = kzalloc(sizeof(*estate), GFP_KERNEL); if (!estate) - return; + return -ENOMEM; - refcount_set(&estate->ref, 1); + refcount_set(&estate->ref, 2); estate->server_id = server->debug_id; estate->rtt = UINT_MAX; @@ -256,21 +256,31 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, old = rcu_dereference_protected(server->endpoint_state, lockdep_is_held(&server->fs_lock)); - estate->responsive_set = old->responsive_set; - estate->addresses = afs_get_addrlist(new_alist ?: old->addresses, - afs_alist_trace_get_estate); + if (old) { + estate->responsive_set = old->responsive_set; + if (!new_alist) + new_alist = old->addresses; + } + + if (old_alist != new_alist) + afs_set_peer_appdata(server, old_alist, new_alist); + + estate->addresses = afs_get_addrlist(new_alist, afs_alist_trace_get_estate); alist = estate->addresses; estate->probe_seq = ++server->probe_counter; atomic_set(&estate->nr_probing, alist->nr_addrs); + if (new_alist) + server->addr_version = new_alist->version; rcu_assign_pointer(server->endpoint_state, estate); - set_bit(AFS_ESTATE_SUPERSEDED, &old->flags); write_unlock(&server->fs_lock); + if (old) + set_bit(AFS_ESTATE_SUPERSEDED, &old->flags); trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref), afs_estate_trace_alloc_probe); - afs_get_address_preferences(net, alist); + afs_get_address_preferences(net, new_alist); server->probed_at = jiffies; unprobed = (1UL << alist->nr_addrs) - 1; @@ -293,6 +303,8 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, } afs_put_endpoint_state(old, afs_estate_trace_put_probe); + afs_put_endpoint_state(estate, afs_estate_trace_put_probe); + return 0; } /* @@ -506,10 +518,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta finish_wait(&server->probe_wq, &wait); dont_wait: - if (estate->responsive_set & ~exclude) - return 1; if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) return 0; + if (estate->responsive_set & ~exclude) + return 1; if (is_intr && signal_pending(current)) return -ERESTARTSYS; if (timo == 0) @@ -522,6 +534,6 @@ dont_wait: */ void afs_fs_probe_cleanup(struct afs_net *net) { - if (del_timer_sync(&net->fs_probe_timer)) + if (timer_delete_sync(&net->fs_probe_timer)) afs_dec_servers_outstanding(net); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 79cd30775b7a..bc9556991d7c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -301,18 +301,19 @@ void afs_fs_fetch_status(struct afs_operation *op) static int afs_deliver_fs_fetch_data(struct afs_call *call) { struct afs_operation *op = call->op; + struct netfs_io_subrequest *subreq = op->fetch.subreq; struct afs_vnode_param *vp = &op->file[0]; - struct afs_read *req = op->fetch.req; const __be32 *bp; + size_t count_before; int ret; _enter("{%u,%zu,%zu/%llu}", call->unmarshall, call->iov_len, iov_iter_count(call->iter), - req->actual_len); + call->remaining); switch (call->unmarshall) { case 0: - req->actual_len = 0; + call->remaining = 0; call->unmarshall++; if (call->operation_ID == FSFETCHDATA64) { afs_extract_to_tmp64(call); @@ -322,8 +323,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) } fallthrough; - /* Extract the returned data length into - * ->actual_len. This may indicate more or less data than was + /* Extract the returned data length into ->remaining. + * This may indicate more or less data than was * requested will be returned. */ case 1: @@ -332,38 +333,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (ret < 0) return ret; - req->actual_len = be64_to_cpu(call->tmp64); - _debug("DATA length: %llu", req->actual_len); + call->remaining = be64_to_cpu(call->tmp64); + _debug("DATA length: %llu", call->remaining); - if (req->actual_len == 0) + if (call->remaining == 0) goto no_more_data; - call->iter = req->iter; - call->iov_len = min(req->actual_len, req->len); + call->iter = &subreq->io_iter; + call->iov_len = umin(call->remaining, subreq->len - subreq->transferred); call->unmarshall++; fallthrough; /* extract the returned data */ case 2: - _debug("extract data %zu/%llu", - iov_iter_count(call->iter), req->actual_len); + count_before = call->iov_len; + _debug("extract data %zu/%llu", count_before, call->remaining); ret = afs_extract_data(call, true); + subreq->transferred += count_before - call->iov_len; + call->remaining -= count_before - call->iov_len; if (ret < 0) return ret; call->iter = &call->def_iter; - if (req->actual_len <= req->len) + if (call->remaining) goto no_more_data; /* Discard any excess data the server gave us */ - afs_extract_discard(call, req->actual_len - req->len); + afs_extract_discard(call, call->remaining); call->unmarshall = 3; fallthrough; case 3: _debug("extract discard %zu/%llu", - iov_iter_count(call->iter), req->actual_len - req->len); + iov_iter_count(call->iter), call->remaining); ret = afs_extract_data(call, true); if (ret < 0) @@ -385,8 +388,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) xdr_decode_AFSCallBack(&bp, call, &vp->scb); xdr_decode_AFSVolSync(&bp, &op->volsync); - req->data_version = vp->scb.status.data_version; - req->file_size = vp->scb.status.size; + if (subreq->start + subreq->transferred >= vp->scb.status.size) + __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); call->unmarshall++; fallthrough; @@ -405,14 +408,18 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) static const struct afs_call_type afs_RXFSFetchData = { .name = "FS.FetchData", .op = afs_FS_FetchData, + .async_rx = afs_fetch_data_async_rx, .deliver = afs_deliver_fs_fetch_data, + .immediate_cancel = afs_fetch_data_immediate_cancel, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSFetchData64 = { .name = "FS.FetchData64", .op = afs_FS_FetchData64, + .async_rx = afs_fetch_data_async_rx, .deliver = afs_deliver_fs_fetch_data, + .immediate_cancel = afs_fetch_data_immediate_cancel, .destructor = afs_flat_call_destructor, }; @@ -421,8 +428,8 @@ static const struct afs_call_type afs_RXFSFetchData64 = { */ static void afs_fs_fetch_data64(struct afs_operation *op) { + struct netfs_io_subrequest *subreq = op->fetch.subreq; struct afs_vnode_param *vp = &op->file[0]; - struct afs_read *req = op->fetch.req; struct afs_call *call; __be32 *bp; @@ -432,16 +439,19 @@ static void afs_fs_fetch_data64(struct afs_operation *op) if (!call) return afs_op_nomem(op); + if (op->flags & AFS_OPERATION_ASYNC) + call->async = true; + /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSFETCHDATA64); bp[1] = htonl(vp->fid.vid); bp[2] = htonl(vp->fid.vnode); bp[3] = htonl(vp->fid.unique); - bp[4] = htonl(upper_32_bits(req->pos)); - bp[5] = htonl(lower_32_bits(req->pos)); + bp[4] = htonl(upper_32_bits(subreq->start + subreq->transferred)); + bp[5] = htonl(lower_32_bits(subreq->start + subreq->transferred)); bp[6] = 0; - bp[7] = htonl(lower_32_bits(req->len)); + bp[7] = htonl(lower_32_bits(subreq->len - subreq->transferred)); call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); @@ -453,9 +463,9 @@ static void afs_fs_fetch_data64(struct afs_operation *op) */ void afs_fs_fetch_data(struct afs_operation *op) { + struct netfs_io_subrequest *subreq = op->fetch.subreq; struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_read *req = op->fetch.req; __be32 *bp; if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) @@ -467,16 +477,14 @@ void afs_fs_fetch_data(struct afs_operation *op) if (!call) return afs_op_nomem(op); - req->call_debug_id = call->debug_id; - /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSFETCHDATA); bp[1] = htonl(vp->fid.vid); bp[2] = htonl(vp->fid.vnode); bp[3] = htonl(vp->fid.unique); - bp[4] = htonl(lower_32_bits(req->pos)); - bp[5] = htonl(lower_32_bits(req->len)); + bp[4] = htonl(lower_32_bits(subreq->start + subreq->transferred)); + bp[5] = htonl(lower_32_bits(subreq->len + subreq->transferred)); call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); @@ -1645,7 +1653,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server, bp = call->request; *bp++ = htonl(FSGIVEUPALLCALLBACKS); - call->server = afs_use_server(server, afs_server_trace_give_up_cb); + call->server = afs_use_server(server, false, afs_server_trace_use_give_up_cb); afs_make_call(call, GFP_NOFS); afs_wait_for_call_to_complete(call); ret = call->error; @@ -1728,6 +1736,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { .op = afs_FS_GetCapabilities, .deliver = afs_deliver_fs_get_capabilities, .done = afs_fileserver_probe_result, + .immediate_cancel = afs_fileserver_probe_result, .destructor = afs_fs_get_capabilities_destructor, }; @@ -1751,7 +1760,7 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, return false; call->key = key; - call->server = afs_use_server(server, afs_server_trace_get_caps); + call->server = afs_use_server(server, false, afs_server_trace_use_get_caps); call->peer = rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer); call->probe = afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps); call->probe_index = addr_index; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 94fc049aff58..e9538e91f848 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -25,8 +25,94 @@ #include "internal.h" #include "afs_fs.h" +void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op) +{ + size_t size = strlen(op->create.symlink) + 1; + size_t dsize = 0; + char *p; + + if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size, + mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0) + return; + + vnode->directory_size = dsize; + p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0); + memcpy(p, op->create.symlink, size); + kunmap_local(p); + set_bit(AFS_VNODE_DIR_READ, &vnode->flags); + netfs_single_mark_inode_dirty(&vnode->netfs.inode); +} + +static void afs_put_link(void *arg) +{ + struct folio *folio = virt_to_folio(arg); + + kunmap_local(arg); + folio_put(folio); +} + +const char *afs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + struct folio *folio; + char *content; + ssize_t ret; + + if (!dentry) { + /* RCU pathwalk. */ + if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode)) + return ERR_PTR(-ECHILD); + goto good; + } + + if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags)) + goto fetch; + + ret = afs_validate(vnode, NULL); + if (ret < 0) + return ERR_PTR(ret); + + if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && + test_bit(AFS_VNODE_DIR_READ, &vnode->flags)) + goto good; + +fetch: + ret = afs_read_single(vnode, NULL); + if (ret < 0) + return ERR_PTR(ret); + set_bit(AFS_VNODE_DIR_READ, &vnode->flags); + +good: + folio = folioq_folio(vnode->directory, 0); + folio_get(folio); + content = kmap_local_folio(folio, 0); + set_delayed_call(callback, afs_put_link, content); + return content; +} + +int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + DEFINE_DELAYED_CALL(done); + const char *content; + int len; + + content = afs_get_link(dentry, d_inode(dentry), &done); + if (IS_ERR(content)) { + do_delayed_call(&done); + return PTR_ERR(content); + } + + len = umin(strlen(content), buflen); + if (copy_to_user(buffer, content, len)) + len = -EFAULT; + do_delayed_call(&done); + return len; +} + static const struct inode_operations afs_symlink_inode_operations = { - .get_link = page_get_link, + .get_link = afs_get_link, + .readlink = afs_readlink, }; static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) @@ -110,7 +196,9 @@ static int afs_inode_init_from_status(struct afs_operation *op, inode->i_op = &afs_dir_inode_operations; inode->i_fop = &afs_dir_file_operations; inode->i_mapping->a_ops = &afs_dir_aops; - mapping_set_large_folios(inode->i_mapping); + __set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags); + /* Assume locally cached directory data will be valid. */ + __set_bit(AFS_VNODE_DIR_VALID, &vnode->flags); break; case AFS_FTYPE_SYMLINK: /* Symlinks with a mode of 0644 are actually mountpoints. */ @@ -122,13 +210,13 @@ static int afs_inode_init_from_status(struct afs_operation *op, inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; inode->i_fop = &afs_mntpt_file_operations; - inode->i_mapping->a_ops = &afs_symlink_aops; } else { inode->i_mode = S_IFLNK | status->mode; inode->i_op = &afs_symlink_inode_operations; - inode->i_mapping->a_ops = &afs_symlink_aops; } + inode->i_mapping->a_ops = &afs_dir_aops; inode_nohighmem(inode); + mapping_set_release_always(inode->i_mapping); break; default: dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL); @@ -140,15 +228,17 @@ static int afs_inode_init_from_status(struct afs_operation *op, afs_set_netfs_context(vnode); vnode->invalid_before = status->data_version; + trace_afs_set_dv(vnode, status->data_version); inode_set_iversion_raw(&vnode->netfs.inode, status->data_version); if (!vp->scb.have_cb) { /* it's a symlink we just created (the fileserver * didn't give us a callback) */ - atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); + afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink); } else { vnode->cb_server = op->server; - atomic64_set(&vnode->cb_expires_at, vp->scb.callback.expires_at); + afs_set_cb_promise(vnode, vp->scb.callback.expires_at, + afs_cb_promise_set_new_inode); } write_sequnlock(&vnode->cb_lock); @@ -207,12 +297,17 @@ static void afs_apply_status(struct afs_operation *op, if (vp->update_ctime) inode_set_ctime_to_ts(inode, op->ctime); - if (vnode->status.data_version != status->data_version) + if (vnode->status.data_version != status->data_version) { + trace_afs_set_dv(vnode, status->data_version); data_changed = true; + } vnode->status = *status; if (vp->dv_before + vp->dv_delta != status->data_version) { + trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta, + status->data_version); + if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) && atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE) pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n", @@ -223,12 +318,10 @@ static void afs_apply_status(struct afs_operation *op, op->debug_id); vnode->invalid_before = status->data_version; - if (vnode->status.type == AFS_FTYPE_DIR) { - if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) - afs_stat_v(vnode, n_inval); - } else { + if (vnode->status.type == AFS_FTYPE_DIR) + afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch); + else set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); - } change_size = true; data_changed = true; unexpected_jump = true; @@ -258,6 +351,8 @@ static void afs_apply_status(struct afs_operation *op, inode_set_ctime_to_ts(inode, t); inode_set_atime_to_ts(inode, t); } + if (op->ops == &afs_fetch_data_operation) + op->fetch.subreq->rreq->i_size = status->size; } } @@ -273,7 +368,7 @@ static void afs_apply_callback(struct afs_operation *op, if (!afs_cb_is_broken(vp->cb_break_before, vnode)) { if (op->volume->type == AFSVL_RWVOL) vnode->cb_server = op->server; - atomic64_set(&vnode->cb_expires_at, cb->expires_at); + afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb); } } @@ -435,7 +530,9 @@ static void afs_get_inode_cache(struct afs_vnode *vnode) } __packed key; struct afs_vnode_cache_aux aux; - if (vnode->status.type != AFS_FTYPE_FILE) { + if (vnode->status.type != AFS_FTYPE_FILE && + vnode->status.type != AFS_FTYPE_DIR && + vnode->status.type != AFS_FTYPE_SYMLINK) { vnode->netfs.cache = NULL; return; } @@ -512,7 +609,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque) struct afs_vnode *vnode = AFS_FS_I(inode); vnode->volume = as->volume; - vnode->fid.vid = as->volume->vid, + vnode->fid.vid = as->volume->vid; vnode->fid.vnode = 1; vnode->fid.unique = 1; inode->i_ino = 1; @@ -545,7 +642,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); - vnode->cb_v_check = atomic_read(&as->volume->cb_v_break), + vnode->cb_v_check = atomic_read(&as->volume->cb_v_break); afs_set_netfs_context(vnode); op = afs_alloc_operation(key, as->volume); @@ -637,6 +734,7 @@ int afs_drop_inode(struct inode *inode) void afs_evict_inode(struct inode *inode) { struct afs_vnode_cache_aux aux; + struct afs_super_info *sbi = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); _enter("{%llx:%llu.%d}", @@ -648,7 +746,22 @@ void afs_evict_inode(struct inode *inode) ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); + if ((S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) && + (inode->i_state & I_DIRTY) && + !sbi->dyn_root) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .for_sync = true, + .range_end = LLONG_MAX, + }; + + afs_single_writepages(inode->i_mapping, &wbc); + } + + netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); + netfs_free_folioq_buffer(vnode->directory); afs_set_cache_aux(vnode, &aux); netfs_clear_inode_writeback(inode, &aux); @@ -694,13 +807,18 @@ static void afs_setattr_edit_file(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[0]; struct afs_vnode *vnode = vp->vnode; + struct inode *inode = &vnode->netfs.inode; if (op->setattr.attr->ia_valid & ATTR_SIZE) { loff_t size = op->setattr.attr->ia_size; - loff_t i_size = op->setattr.old_i_size; + loff_t old = op->setattr.old_i_size; + + /* Note: inode->i_size was updated by afs_apply_status() inside + * the I/O and callback locks. + */ - if (size != i_size) { - truncate_setsize(&vnode->netfs.inode, size); + if (size != old) { + truncate_pagecache(inode, size); netfs_resize_file(&vnode->netfs, size, true); fscache_resize_cookie(afs_vnode_cache(vnode), size); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6ce5a612937c..1124ea4000cb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -20,6 +20,7 @@ #include <linux/uuid.h> #include <linux/mm_types.h> #include <linux/dns_resolver.h> +#include <crypto/krb5.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> @@ -130,6 +131,7 @@ struct afs_call { wait_queue_head_t waitq; /* processes awaiting completion */ struct work_struct async_work; /* async I/O processor */ struct work_struct work; /* actual work processor */ + struct work_struct free_work; /* Deferred free processor */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* security for this call */ @@ -162,6 +164,7 @@ struct afs_call { spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ + unsigned long long remaining; /* How much is left to receive */ unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ @@ -174,8 +177,10 @@ struct afs_call { bool intr; /* T if interruptible */ bool unmarshalling_error; /* T if an unmarshalling error occurred */ bool responded; /* Got a response from the call (may be abort) */ + u8 security_ix; /* Security class */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ + u32 enctype; /* Security encoding type */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ union { /* place to extract temporary data */ @@ -200,11 +205,17 @@ struct afs_call_type { /* clean up a call */ void (*destructor)(struct afs_call *call); + /* Async receive processing function */ + void (*async_rx)(struct work_struct *work); + /* Work function */ void (*work)(struct work_struct *work); /* Call done function (gets called immediately on success or failure) */ void (*done)(struct afs_call *call); + + /* Handle a call being immediately cancelled. */ + void (*immediate_cancel)(struct afs_call *call); }; /* @@ -232,28 +243,6 @@ static inline struct key *afs_file_key(struct file *file) } /* - * Record of an outstanding read operation on a vnode. - */ -struct afs_read { - loff_t pos; /* Where to start reading */ - loff_t len; /* How much we're asking for */ - loff_t actual_len; /* How much we're actually getting */ - loff_t file_size; /* File size returned by server */ - struct key *key; /* The key to use to reissue the read */ - struct afs_vnode *vnode; /* The file being read into. */ - struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */ - afs_dataversion_t data_version; /* Version number returned by server */ - refcount_t usage; - unsigned int call_debug_id; - unsigned int nr_pages; - int error; - void (*done)(struct afs_read *); - void (*cleanup)(struct afs_read *); - struct iov_iter *iter; /* Iterator representing the buffer */ - struct iov_iter def_iter; /* Default iterator */ -}; - -/* * AFS superblock private data * - there's one superblock per volume */ @@ -295,15 +284,15 @@ struct afs_net { struct socket *socket; struct afs_call *spare_incoming_call; struct work_struct charge_preallocation_work; + struct work_struct rx_oob_work; struct mutex socket_mutex; atomic_t nr_outstanding_calls; atomic_t nr_superblocks; /* Cell database */ struct rb_root cells; - struct afs_cell *ws_cell; - struct work_struct cells_manager; - struct timer_list cells_timer; + struct idr cells_dyn_ino; /* cell->dynroot_ino mapping */ + struct afs_cell __rcu *ws_cell; atomic_t cells_outstanding; struct rw_semaphore cells_lock; struct mutex cells_alias_lock; @@ -315,18 +304,12 @@ struct afs_net { * cell, but in practice, people create aliases and subsets and there's * no easy way to distinguish them. */ - seqlock_t fs_lock; /* For fs_servers, fs_probe_*, fs_proc */ - struct rb_root fs_servers; /* afs_server (by server UUID or address) */ + seqlock_t fs_lock; /* For fs_probe_*, fs_proc */ struct list_head fs_probe_fast; /* List of afs_server to probe at 30s intervals */ struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */ struct hlist_head fs_proc; /* procfs servers list */ - struct hlist_head fs_addresses; /* afs_server (by lowest IPv6 addr) */ - seqlock_t fs_addr_lock; /* For fs_addresses[46] */ - - struct work_struct fs_manager; - struct timer_list fs_timer; - + struct key *fs_cm_token_key; /* Key for creating CM tokens */ struct work_struct fs_prober; struct timer_list fs_probe_timer; atomic_t servers_outstanding; @@ -359,13 +342,10 @@ struct afs_net { extern const char afs_init_sysname[]; enum afs_cell_state { - AFS_CELL_UNSET, - AFS_CELL_ACTIVATING, + AFS_CELL_SETTING_UP, AFS_CELL_ACTIVE, - AFS_CELL_DEACTIVATING, - AFS_CELL_INACTIVE, - AFS_CELL_FAILED, - AFS_CELL_REMOVED, + AFS_CELL_REMOVING, + AFS_CELL_DEAD, }; /* @@ -396,7 +376,9 @@ struct afs_cell { struct afs_cell *alias_of; /* The cell this is an alias of */ struct afs_volume *root_volume; /* The root.cell volume if there is one */ struct key *anonymous_key; /* anonymous user key for this cell */ + struct work_struct destroyer; /* Destroyer for cell */ struct work_struct manager; /* Manager for init/deinit/dns */ + struct timer_list management_timer; /* General management timer */ struct hlist_node proc_link; /* /proc cell list link */ time64_t dns_expiry; /* Time AFSDB/SRV record expires */ time64_t last_inactive; /* Time of last drop of usage count */ @@ -412,6 +394,7 @@ struct afs_cell { enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */ unsigned int dns_lookup_count; /* Counter of DNS lookups */ unsigned int debug_id; + unsigned int dynroot_ino; /* Inode numbers for dynroot (a pair) */ /* The volumes belonging to this cell */ struct rw_semaphore vs_lock; /* Lock for server->volumes */ @@ -421,7 +404,7 @@ struct afs_cell { /* Active fileserver interaction state. */ struct rb_root fs_servers; /* afs_server (by server UUID) */ - seqlock_t fs_lock; /* For fs_servers */ + struct rw_semaphore fs_lock; /* For fs_servers */ /* VL server list. */ rwlock_t vl_servers_lock; /* Lock on vl_servers */ @@ -556,22 +539,24 @@ struct afs_server { }; struct afs_cell *cell; /* Cell to which belongs (pins ref) */ - struct rb_node uuid_rb; /* Link in net->fs_servers */ - struct afs_server __rcu *uuid_next; /* Next server with same UUID */ - struct afs_server *uuid_prev; /* Previous server with same UUID */ - struct list_head probe_link; /* Link in net->fs_probe_list */ - struct hlist_node addr_link; /* Link in net->fs_addresses6 */ + struct rb_node uuid_rb; /* Link in cell->fs_servers */ + struct list_head probe_link; /* Link in net->fs_probe_* */ struct hlist_node proc_link; /* Link in net->fs_proc */ struct list_head volumes; /* RCU list of afs_server_entry objects */ - struct afs_server *gc_next; /* Next server in manager's list */ + struct work_struct destroyer; /* Work item to try and destroy a server */ + struct timer_list timer; /* Management timer */ + struct mutex cm_token_lock; /* Lock governing creation of appdata */ + struct krb5_buffer cm_rxgk_appdata; /* Appdata to be included in RESPONSE packet */ time64_t unuse_time; /* Time at which last unused */ unsigned long flags; #define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */ #define AFS_SERVER_FL_UPDATING 1 #define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */ -#define AFS_SERVER_FL_NOT_READY 4 /* The record is not ready for use */ -#define AFS_SERVER_FL_NOT_FOUND 5 /* VL server says no such server */ -#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */ +#define AFS_SERVER_FL_UNCREATED 3 /* The record needs creating */ +#define AFS_SERVER_FL_CREATING 4 /* The record is being created */ +#define AFS_SERVER_FL_EXPIRED 5 /* The record has expired */ +#define AFS_SERVER_FL_NOT_FOUND 6 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 7 /* Failed to access VL server */ #define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ #define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */ #define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */ @@ -581,6 +566,7 @@ struct afs_server { atomic_t active; /* Active user count */ u32 addr_version; /* Address list version */ u16 service_id; /* Service ID we're using. */ + short create_error; /* Creation error */ unsigned int rtt; /* Server's current RTT in uS */ unsigned int debug_id; /* Debugging ID for traces */ @@ -635,6 +621,7 @@ struct afs_volume { afs_volid_t vid; /* The volume ID of this volume */ afs_volid_t vids[AFS_MAXTYPES]; /* All associated volume IDs */ refcount_t ref; + unsigned int debug_id; /* Debugging ID for traces */ time64_t update_at; /* Time at which to next update */ struct afs_cell *cell; /* Cell to which belongs (pins ref) */ struct rb_node cell_node; /* Link in cell->volumes */ @@ -701,24 +688,26 @@ struct afs_vnode { struct afs_file_status status; /* AFS status info for this file */ afs_dataversion_t invalid_before; /* Child dentries are invalid before this */ struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ - struct mutex io_lock; /* Lock for serialising I/O on this mutex */ + struct list_head io_lock_waiters; /* Threads waiting for the I/O lock */ struct rw_semaphore validate_lock; /* lock for validating this vnode */ struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */ struct key *silly_key; /* Silly rename key */ spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; +#define AFS_VNODE_IO_LOCK 0 /* Set if the I/O serialisation lock is held */ #define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ #define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */ #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ -#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ #define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ #define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */ #define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */ +#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */ + struct folio_queue *directory; /* Directory contents */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ struct list_head granted_locks; /* locks granted on this file */ @@ -727,6 +716,7 @@ struct afs_vnode { ktime_t locked_at; /* Time at which lock obtained */ enum afs_lock_state lock_state : 8; afs_lock_type_t lock_type : 8; + unsigned int directory_size; /* Amount of space in ->directory */ /* outstanding callback notification on this file */ struct work_struct cb_work; /* Work for mmap'd files */ @@ -906,7 +896,7 @@ struct afs_operation { bool new_negative; } rename; struct { - struct afs_read *req; + struct netfs_io_subrequest *subreq; } fetch; struct { afs_lock_type_t type; @@ -916,7 +906,6 @@ struct afs_operation { loff_t pos; loff_t size; loff_t i_size; - bool laundering; /* Laundering page, PG_writeback not set */ } store; struct { struct iattr *attr; @@ -959,6 +948,7 @@ struct afs_operation { #define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */ #define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */ #define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */ +#define AFS_OPERATION_ASYNC 0x2000 /* Set if should run asynchronously */ }; /* @@ -983,6 +973,21 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl i_size_read(&vnode->netfs.inode), flags); } +/* + * Directory iteration management. + */ +struct afs_dir_iter { + struct afs_vnode *dvnode; + union afs_xdr_dir_block *block; + struct folio_queue *fq; + unsigned int fpos; + int fq_slot; + unsigned int loop_check; + u8 nr_slots; + u8 bucket; + unsigned int prev_entry; +}; + #include <trace/events/afs.h> /*****************************************************************************/ @@ -1003,6 +1008,9 @@ extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr, __be32 xdr, u16 port); extern int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *addr, __be32 *xdr, u16 port); +void afs_set_peer_appdata(struct afs_server *server, + struct afs_addr_list *old_alist, + struct afs_addr_list *new_alist); /* * addr_prefs.c @@ -1039,16 +1047,17 @@ static inline bool afs_cb_is_broken(unsigned int cb_break, extern int afs_cell_init(struct afs_net *, const char *); extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned, enum afs_cell_trace); -extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, - const char *, bool); +struct afs_cell *afs_lookup_cell(struct afs_net *net, + const char *name, unsigned int namesz, + const char *vllist, bool excl, + enum afs_cell_trace trace); extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace); -extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace); +void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason); extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace); extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace); extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace); extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace); -extern void afs_manage_cells(struct work_struct *); -extern void afs_cells_timer(struct timer_list *); +void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs); extern void __net_exit afs_cell_purge(struct afs_net *); /* @@ -1057,6 +1066,19 @@ extern void __net_exit afs_cell_purge(struct afs_net *); extern bool afs_cm_incoming_call(struct afs_call *); /* + * cm_security.c + */ +void afs_process_oob_queue(struct work_struct *work); +#ifdef CONFIG_RXGK +int afs_create_token_key(struct afs_net *net, struct socket *socket); +#else +static inline int afs_create_token_key(struct afs_net *net, struct socket *socket) +{ + return 0; +} +#endif + +/* * dir.c */ extern const struct file_operations afs_dir_file_operations; @@ -1064,8 +1086,13 @@ extern const struct inode_operations afs_dir_inode_operations; extern const struct address_space_operations afs_dir_aops; extern const struct dentry_operations afs_fs_dentry_operations; +ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file); +ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file) + __acquires(&dvnode->validate_lock); extern void afs_d_release(struct dentry *); extern void afs_check_for_remote_deletion(struct afs_operation *); +int afs_single_writepages(struct address_space *mapping, + struct writeback_control *wbc); /* * dir_edit.c @@ -1073,6 +1100,20 @@ extern void afs_check_for_remote_deletion(struct afs_operation *); extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *, enum afs_edit_dir_reason); extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason); +void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode, + enum afs_edit_dir_reason why); +void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode); + +/* + * dir_search.c + */ +unsigned int afs_dir_hash_name(const struct qstr *name); +bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name); +union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block); +int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name, + struct afs_fid *_fid); +int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name, + struct afs_fid *_fid, afs_dataversion_t *_dir_version); /* * dir_silly.c @@ -1087,34 +1128,23 @@ extern int afs_silly_iput(struct dentry *, struct inode *); extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_dynroot_dentry_operations; -extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *); -extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *); -extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *); -extern int afs_dynroot_populate(struct super_block *); -extern void afs_dynroot_depopulate(struct super_block *); +struct inode *afs_dynroot_iget_root(struct super_block *sb); /* * file.c */ extern const struct address_space_operations afs_file_aops; -extern const struct address_space_operations afs_symlink_aops; extern const struct inode_operations afs_file_inode_operations; extern const struct file_operations afs_file_operations; +extern const struct afs_operation_ops afs_fetch_data_operation; extern const struct netfs_request_ops afs_req_ops; extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *); extern void afs_put_wb_key(struct afs_wb_key *); extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); -extern int afs_fetch_data(struct afs_vnode *, struct afs_read *); -extern struct afs_read *afs_alloc_read(gfp_t); -extern void afs_put_read(struct afs_read *); - -static inline struct afs_read *afs_get_read(struct afs_read *req) -{ - refcount_inc(&req->usage); - return req; -} +void afs_fetch_data_async_rx(struct work_struct *work); +void afs_fetch_data_immediate_cancel(struct afs_call *call); /* * flock.c @@ -1166,6 +1196,7 @@ extern void afs_fs_store_acl(struct afs_operation *); extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *); extern int afs_put_operation(struct afs_operation *); extern bool afs_begin_vnode_operation(struct afs_operation *); +extern void afs_end_vnode_operation(struct afs_operation *op); extern void afs_wait_for_operation(struct afs_operation *); extern int afs_do_sync_operation(struct afs_operation *); @@ -1189,8 +1220,8 @@ struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *est enum afs_estate_trace where); void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where); extern void afs_fileserver_probe_result(struct afs_call *); -void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, - struct afs_addr_list *new_addrs, struct key *key); +int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, + struct afs_addr_list *new_alist, struct key *key); int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr); extern void afs_probe_fileserver(struct afs_net *, struct afs_server *); extern void afs_fs_probe_dispatcher(struct work_struct *); @@ -1203,10 +1234,13 @@ extern void afs_fs_probe_cleanup(struct afs_net *); */ extern const struct afs_operation_ops afs_fetch_status_operation; +void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op); +const char *afs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback); +int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen); extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *); extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); extern int afs_ilookup5_test_by_fid(struct inode *, void *); -extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *); extern struct inode *afs_root_iget(struct super_block *, struct key *); extern int afs_getattr(struct mnt_idmap *idmap, const struct path *, @@ -1332,7 +1366,9 @@ extern int __net_init afs_open_socket(struct afs_net *); extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); +void afs_deferred_put_call(struct afs_call *call); void afs_make_call(struct afs_call *call, gfp_t gfp); +void afs_deliver_to_call(struct afs_call *call); void afs_wait_for_call_to_complete(struct afs_call *call); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, @@ -1343,6 +1379,28 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, bool); extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause); +static inline struct afs_call *afs_get_call(struct afs_call *call, + enum afs_call_trace why) +{ + int r; + + __refcount_inc(&call->ref, &r); + + trace_afs_call(call->debug_id, why, r + 1, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); + return call; +} + +static inline void afs_see_call(struct afs_call *call, enum afs_call_trace why) +{ + int r = refcount_read(&call->ref); + + trace_afs_call(call->debug_id, why, r, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); +} + static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call, gfp_t gfp) { @@ -1464,20 +1522,30 @@ extern void __exit afs_clean_up_permit_cache(void); */ extern spinlock_t afs_server_peer_lock; -extern struct afs_server *afs_find_server(struct afs_net *, const struct rxrpc_peer *); -extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); +struct afs_server *afs_find_server(const struct rxrpc_peer *peer); extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32); extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); -extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace); -extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace); -extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace); +struct afs_server *afs_use_server(struct afs_server *server, bool activate, + enum afs_server_trace reason); +void afs_unuse_server(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason); +void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason); extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace); -extern void afs_manage_servers(struct work_struct *); -extern void afs_servers_timer(struct timer_list *); +void afs_purge_servers(struct afs_cell *cell); extern void afs_fs_probe_timer(struct timer_list *); -extern void __net_exit afs_purge_servers(struct afs_net *); +void __net_exit afs_wait_for_servers(struct afs_net *net); bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key); +static inline void afs_see_server(struct afs_server *server, enum afs_server_trace trace) +{ + int r = refcount_read(&server->ref); + int a = atomic_read(&server->active); + + trace_afs_server(server->debug_id, r, a, trace); + +} + static inline void afs_inc_servers_outstanding(struct afs_net *net) { atomic_inc(&net->servers_outstanding); @@ -1599,11 +1667,14 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *); /* * write.c */ +void afs_prepare_write(struct netfs_io_subrequest *subreq); +void afs_issue_write(struct netfs_io_subrequest *subreq); +void afs_begin_writeback(struct netfs_io_request *wreq); +void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream); extern int afs_writepages(struct address_space *, struct writeback_control *); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); extern void afs_prune_wb_keys(struct afs_vnode *); -void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len); /* * xattr.c @@ -1706,6 +1777,38 @@ static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where) return -EIO; } +/* + * Set the callback promise on a vnode. + */ +static inline void afs_set_cb_promise(struct afs_vnode *vnode, time64_t expires_at, + enum afs_cb_promise_trace trace) +{ + atomic64_set(&vnode->cb_expires_at, expires_at); + trace_afs_cb_promise(vnode, trace); +} + +/* + * Clear the callback promise on a vnode, returning true if it was promised. + */ +static inline bool afs_clear_cb_promise(struct afs_vnode *vnode, + enum afs_cb_promise_trace trace) +{ + trace_afs_cb_promise(vnode, trace); + return atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE; +} + +/* + * Mark a directory as being invalid. + */ +static inline void afs_invalidate_dir(struct afs_vnode *dvnode, + enum afs_dir_invalid_trace trace) +{ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + trace_afs_dir_invalid(dvnode, trace); + afs_stat_v(dvnode, n_inval); + } +} + /*****************************************************************************/ /* * debug tracing diff --git a/fs/afs/main.c b/fs/afs/main.c index a14f6013e316..02475d415d88 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -73,28 +73,21 @@ static int __net_init afs_net_init(struct net *net_ns) generate_random_uuid((unsigned char *)&net->uuid); INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); + INIT_WORK(&net->rx_oob_work, afs_process_oob_queue); mutex_init(&net->socket_mutex); net->cells = RB_ROOT; + idr_init(&net->cells_dyn_ino); init_rwsem(&net->cells_lock); - INIT_WORK(&net->cells_manager, afs_manage_cells); - timer_setup(&net->cells_timer, afs_cells_timer, 0); - mutex_init(&net->cells_alias_lock); mutex_init(&net->proc_cells_lock); INIT_HLIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); - net->fs_servers = RB_ROOT; INIT_LIST_HEAD(&net->fs_probe_fast); INIT_LIST_HEAD(&net->fs_probe_slow); INIT_HLIST_HEAD(&net->fs_proc); - INIT_HLIST_HEAD(&net->fs_addresses); - seqlock_init(&net->fs_addr_lock); - - INIT_WORK(&net->fs_manager, afs_manage_servers); - timer_setup(&net->fs_timer, afs_servers_timer, 0); INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher); timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0); atomic_set(&net->servers_outstanding, 1); @@ -130,13 +123,14 @@ error_open_socket: net->live = false; afs_fs_probe_cleanup(net); afs_cell_purge(net); - afs_purge_servers(net); + afs_wait_for_servers(net); error_cell_init: net->live = false; afs_proc_cleanup(net); error_proc: afs_put_sysnames(net->sysnames); error_sysnames: + idr_destroy(&net->cells_dyn_ino); net->live = false; return ret; } @@ -151,10 +145,11 @@ static void __net_exit afs_net_exit(struct net *net_ns) net->live = false; afs_fs_probe_cleanup(net); afs_cell_purge(net); - afs_purge_servers(net); + afs_wait_for_servers(net); afs_close_socket(net); afs_proc_cleanup(net); afs_put_sysnames(net->sysnames); + idr_destroy(&net->cells_dyn_ino); kfree_rcu(rcu_access_pointer(net->address_prefs), rcu); } @@ -177,7 +172,7 @@ static int __init afs_init(void) afs_wq = alloc_workqueue("afs", 0, 0); if (!afs_wq) goto error_afs_wq; - afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); + afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!afs_async_calls) goto error_async; afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index b8180bf2281f..8f2b3a177690 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> +#include <crypto/krb5.h> #include "internal.h" #include "afs_fs.h" #include "protocol_uae.h" @@ -103,6 +104,32 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGK_INCONSISTENCY: return -EPROTO; + case RXGK_PACKETSHORT: return -EPROTO; + case RXGK_BADCHALLENGE: return -EPROTO; + case RXGK_SEALEDINCON: return -EKEYREJECTED; + case RXGK_NOTAUTH: return -EKEYREJECTED; + case RXGK_EXPIRED: return -EKEYEXPIRED; + case RXGK_BADLEVEL: return -EKEYREJECTED; + case RXGK_BADKEYNO: return -EKEYREJECTED; + case RXGK_NOTRXGK: return -EKEYREJECTED; + case RXGK_UNSUPPORTED: return -EKEYREJECTED; + case RXGK_GSSERROR: return -EKEYREJECTED; +#ifdef RXGK_BADETYPE + case RXGK_BADETYPE: return -ENOPKG; +#endif +#ifdef RXGK_BADTOKEN + case RXGK_BADTOKEN: return -EKEYREJECTED; +#endif +#ifdef RXGK_BADETYPE + case RXGK_DATALEN: return -EPROTO; +#endif +#ifdef RXGK_BADQOP + case RXGK_BADQOP: return -EKEYREJECTED; +#endif + + case KRB5_PROG_KEYTYPE_NOSUPP: return -ENOPKG; + case RXGEN_OPCODE: return -ENOTSUPP; default: return -EREMOTEIO; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 97f50e9fd9eb..9434a5399f2b 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -30,7 +30,7 @@ const struct file_operations afs_mntpt_file_operations = { const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, - .readlink = page_readlink, + .readlink = afs_readlink, .getattr = afs_getattr, }; @@ -87,7 +87,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) ctx->force = true; } if (ctx->cell) { - afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt); + afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_mntpt); ctx->cell = NULL; } if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { @@ -107,7 +107,8 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) if (size > AFS_MAXCELLNAME) return -ENAMETOOLONG; - cell = afs_lookup_cell(ctx->net, p, size, NULL, false); + cell = afs_lookup_cell(ctx->net, p, size, NULL, false, + afs_cell_trace_use_lookup_mntpt); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt); return PTR_ERR(cell); @@ -118,9 +119,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) ctx->volnamesz = sizeof(afs_root_volume) - 1; } else { /* read the contents of the AFS special symlink */ - struct page *page; + DEFINE_DELAYED_CALL(cleanup); + const char *content; loff_t size = i_size_read(d_inode(mntpt)); - char *buf; if (src_as->cell) ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt); @@ -128,18 +129,23 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) if (size < 2 || size > PAGE_SIZE - 1) return -EINVAL; - page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); - if (IS_ERR(page)) - return PTR_ERR(page); + content = afs_get_link(mntpt, d_inode(mntpt), &cleanup); + if (IS_ERR(content)) { + do_delayed_call(&cleanup); + return PTR_ERR(content); + } - buf = kmap(page); ret = -EINVAL; - if (buf[size - 1] == '.') - ret = vfs_parse_fs_string(fc, "source", buf, size - 1); - kunmap(page); - put_page(page); + if (content[size - 1] == '.') + ret = vfs_parse_fs_string(fc, "source", content, size - 1); + do_delayed_call(&cleanup); if (ret < 0) return ret; + + /* Don't cross a backup volume mountpoint from a backup volume */ + if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL && + ctx->type == AFSVL_BACKVOL) + return -ENODEV; } return 0; @@ -183,7 +189,6 @@ struct vfsmount *afs_d_automount(struct path *path) if (IS_ERR(newmnt)) return newmnt; - mntget(newmnt); /* prevent immediate expiration */ mnt_set_expiry(newmnt, &afs_vfsmounts); queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 15eab053af6d..40e879c8ca77 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -122,14 +122,15 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) if (strcmp(buf, "add") == 0) { struct afs_cell *cell; - cell = afs_lookup_cell(net, name, strlen(name), args, true); + cell = afs_lookup_cell(net, name, strlen(name), args, true, + afs_cell_trace_use_lookup_add); if (IS_ERR(cell)) { ret = PTR_ERR(cell); goto done; } if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin); + afs_unuse_cell(cell, afs_cell_trace_unuse_no_pin); } else { goto inval; } @@ -206,7 +207,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v) net = afs_seq2net_single(m); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (cell) seq_printf(m, "%s\n", cell->name); up_read(&net->cells_lock); @@ -240,7 +241,13 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) /* determine command to perform */ _debug("rootcell=%s", buf); - ret = afs_cell_init(net, buf); + ret = -EEXIST; + inode_lock(file_inode(file)); + if (!rcu_access_pointer(net->ws_cell)) + ret = afs_cell_init(net, buf); + else + printk("busy\n"); + inode_unlock(file_inode(file)); out: _leave(" = %d", ret); @@ -437,8 +444,6 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) } server = list_entry(v, struct afs_server, proc_link); - estate = rcu_dereference(server->endpoint_state); - alist = estate->addresses; seq_printf(m, "%pU %3d %3d %s\n", &server->uuid, refcount_read(&server->ref), @@ -448,10 +453,16 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) server->flags, server->rtt); seq_printf(m, " - probe: last=%d\n", (int)(jiffies - server->probed_at) / HZ); + + estate = rcu_dereference(server->endpoint_state); + if (!estate) + goto out; failed = estate->failed_set; seq_printf(m, " - ESTATE pq=%x np=%u rsp=%lx f=%lx\n", estate->probe_seq, atomic_read(&estate->nr_probing), estate->responsive_set, estate->failed_set); + + alist = estate->addresses; seq_printf(m, " - ALIST v=%u ap=%u\n", alist->version, alist->addr_pref_version); for (i = 0; i < alist->nr_addrs; i++) { @@ -464,6 +475,8 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) rxrpc_kernel_get_srtt(addr->peer), addr->last_error, addr->prio); } + +out: return 0; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ed04bd1eeae8..a1c24f589d9e 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -99,7 +99,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op, write_seqlock(&vnode->cb_lock); ASSERTCMP(cb_server, ==, vnode->cb_server); vnode->cb_server = NULL; - if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) + if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server)) vnode->cb_break++; write_sequnlock(&vnode->cb_lock); } @@ -541,11 +541,13 @@ pick_server: test_bit(AFS_SE_EXCLUDED, &se->flags) || !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) continue; - es = op->server_states->endpoint_state; + es = op->server_states[i].endpoint_state; sal = es->addresses; afs_get_address_preferences_rcu(op->net, sal); for (j = 0; j < sal->nr_addrs; j++) { + if (es->failed_set & (1 << j)) + continue; if (!sal->addrs[j].peer) continue; if (sal->addrs[j].prio > best_prio) { @@ -581,7 +583,7 @@ selected_server: if (vnode->cb_server != server) { vnode->cb_server = server; vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break); - atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); + afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change); } retry_server: @@ -605,6 +607,8 @@ iterate_address: best_prio = -1; addr_index = 0; for (i = 0; i < alist->nr_addrs; i++) { + if (!(set & (1 << i))) + continue; if (alist->addrs[i].prio > best_prio) { addr_index = i; best_prio = alist->addrs[i].prio; @@ -628,8 +632,10 @@ iterate_address: wait_for_more_probe_results: error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); - if (!error) + if (error == 1) goto iterate_address; + if (!error) + goto restart_from_beginning; /* We've now had a failure to respond on all of a server's addresses - * immediately probe them again and consider retrying the server. @@ -640,10 +646,13 @@ wait_for_more_probe_results: error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { - case 0: + case 1: op->flags &= ~AFS_OPERATION_RETRY_SERVER; - trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + trace_afs_rotate(op, afs_rotate_trace_retry_server, 1); goto retry_server; + case 0: + trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + goto restart_from_beginning; case -ERESTARTSYS: afs_op_set_error(op, error); goto failed; @@ -674,7 +683,7 @@ no_more_servers: for (i = 0; i < op->server_list->nr_servers; i++) { struct afs_endpoint_state *estate; - estate = op->server_states->endpoint_state; + estate = op->server_states[i].endpoint_state; error = READ_ONCE(estate->error); if (error < 0) afs_op_accumulate_error(op, error, estate->abort_code); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c453428f3c8b..c1cadf8fb346 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,13 +18,23 @@ struct workqueue_struct *afs_async_calls; +static void afs_deferred_free_worker(struct work_struct *work); static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID); +static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob); static int afs_deliver_cm_op_id(struct afs_call *); +static const struct rxrpc_kernel_ops afs_rxrpc_callback_ops = { + .notify_new_call = afs_rx_new_call, + .discard_new_call = afs_rx_discard_new_call, + .user_attach_call = afs_rx_attach, + .notify_oob = afs_rx_notify_oob, +}; + /* asynchronous incoming call initial processing */ static const struct afs_call_type afs_RXCMxxxx = { .name = "CB.xxxx", @@ -48,6 +58,7 @@ int afs_open_socket(struct afs_net *net) goto error_1; socket->sk->sk_allocation = GFP_NOFS; + socket->sk->sk_user_data = net; /* bind the callback manager's address to make this a server socket */ memset(&srx, 0, sizeof(srx)); @@ -63,6 +74,14 @@ int afs_open_socket(struct afs_net *net) if (ret < 0) goto error_2; + ret = rxrpc_sock_set_manage_response(socket->sk, true); + if (ret < 0) + goto error_2; + + ret = afs_create_token_key(net, socket); + if (ret < 0) + pr_err("Couldn't create RxGK CM key: %d\n", ret); + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; @@ -83,8 +102,7 @@ int afs_open_socket(struct afs_net *net) * it sends back to us. */ - rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, - afs_rx_discard_new_call); + rxrpc_kernel_set_notifications(socket, &afs_rxrpc_callback_ops); ret = kernel_listen(socket, INT_MAX); if (ret < 0) @@ -124,7 +142,9 @@ void afs_close_socket(struct afs_net *net) kernel_sock_shutdown(net->socket, SHUT_RDWR); flush_workqueue(afs_async_calls); + net->socket->sk->sk_user_data = NULL; sock_release(net->socket); + key_put(net->fs_cm_token_key); _debug("dework"); _leave(""); @@ -148,7 +168,9 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, call->net = net; call->debug_id = atomic_inc_return(&rxrpc_debug_id); refcount_set(&call->ref, 1); - INIT_WORK(&call->async_work, afs_process_async_call); + INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call); + INIT_WORK(&call->work, call->type->work); + INIT_WORK(&call->free_work, afs_deferred_free_worker); init_waitqueue_head(&call->waitq); spin_lock_init(&call->state_lock); call->iter = &call->def_iter; @@ -159,6 +181,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, return call; } +static void afs_free_call(struct afs_call *call) +{ + struct afs_net *net = call->net; + int o; + + ASSERT(!work_pending(&call->async_work)); + + rxrpc_kernel_put_peer(call->peer); + + if (call->rxcall) { + rxrpc_kernel_shutdown_call(net->socket, call->rxcall); + rxrpc_kernel_put_call(net->socket, call->rxcall); + call->rxcall = NULL; + } + if (call->type->destructor) + call->type->destructor(call); + + afs_unuse_server_notime(call->net, call->server, afs_server_trace_unuse_call); + kfree(call->request); + + o = atomic_read(&net->nr_outstanding_calls); + trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, + __builtin_return_address(0)); + kfree(call); + + o = atomic_dec_return(&net->nr_outstanding_calls); + if (o == 0) + wake_up_var(&net->nr_outstanding_calls); +} + /* * Dispose of a reference on a call. */ @@ -173,45 +225,34 @@ void afs_put_call(struct afs_call *call) o = atomic_read(&net->nr_outstanding_calls); trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, __builtin_return_address(0)); + if (zero) + afs_free_call(call); +} - if (zero) { - ASSERT(!work_pending(&call->async_work)); - ASSERT(call->type->name != NULL); - - rxrpc_kernel_put_peer(call->peer); - - if (call->rxcall) { - rxrpc_kernel_shutdown_call(net->socket, call->rxcall); - rxrpc_kernel_put_call(net->socket, call->rxcall); - call->rxcall = NULL; - } - if (call->type->destructor) - call->type->destructor(call); - - afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); - kfree(call->request); - - trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, - __builtin_return_address(0)); - kfree(call); +static void afs_deferred_free_worker(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, free_work); - o = atomic_dec_return(&net->nr_outstanding_calls); - if (o == 0) - wake_up_var(&net->nr_outstanding_calls); - } + afs_free_call(call); } -static struct afs_call *afs_get_call(struct afs_call *call, - enum afs_call_trace why) +/* + * Dispose of a reference on a call, deferring the cleanup to a workqueue + * to avoid lock recursion. + */ +void afs_deferred_put_call(struct afs_call *call) { - int r; - - __refcount_inc(&call->ref, &r); + struct afs_net *net = call->net; + unsigned int debug_id = call->debug_id; + bool zero; + int r, o; - trace_afs_call(call->debug_id, why, r + 1, - atomic_read(&call->net->nr_outstanding_calls), + zero = __refcount_dec_and_test(&call->ref, &r); + o = atomic_read(&net->nr_outstanding_calls); + trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, __builtin_return_address(0)); - return call; + if (zero) + schedule_work(&call->free_work); } /* @@ -220,8 +261,6 @@ static struct afs_call *afs_get_call(struct afs_call *call, static void afs_queue_call_work(struct afs_call *call) { if (call->type->work) { - INIT_WORK(&call->work, call->type->work); - afs_get_call(call, afs_call_trace_work); if (!queue_work(afs_wq, &call->work)) afs_put_call(call); @@ -396,11 +435,16 @@ void afs_make_call(struct afs_call *call, gfp_t gfp) return; error_do_abort: - if (ret != -ECONNABORTED) { + if (ret != -ECONNABORTED) rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, afs_abort_send_data_error); - } else { + if (call->async) { + afs_see_call(call, afs_call_trace_async_abort); + return; + } + + if (ret == -ECONNABORTED) { len = 0; iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); rxrpc_kernel_recv_data(call->net->socket, rxcall, @@ -411,8 +455,10 @@ error_do_abort: call->error = ret; trace_afs_call_done(call); error_kill_call: - if (call->type->done) - call->type->done(call); + if (call->async) + afs_see_call(call, afs_call_trace_async_kill); + if (call->type->immediate_cancel) + call->type->immediate_cancel(call); /* We need to dispose of the extra ref we grabbed for an async call. * The call, however, might be queued on afs_async_calls and we need to @@ -467,7 +513,7 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort) /* * deliver messages to a call */ -static void afs_deliver_to_call(struct afs_call *call) +void afs_deliver_to_call(struct afs_call *call) { enum afs_call_state state; size_t len; @@ -568,7 +614,6 @@ local_abort: abort_code = 0; call_complete: afs_set_call_complete(call, ret, remote_abort); - state = AFS_CALL_COMPLETE; goto done; } @@ -640,7 +685,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, } /* - * wake up an asynchronous call + * Wake up an asynchronous call. The caller is holding the call notify + * spinlock around this, so we can't call afs_put_call(). */ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long call_user_ID) @@ -657,7 +703,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, __builtin_return_address(0)); if (!queue_work(afs_async_calls, &call->async_work)) - afs_put_call(call); + afs_deferred_put_call(call); } } @@ -711,7 +757,6 @@ void afs_charge_preallocation(struct work_struct *work) if (rxrpc_kernel_charge_accept(net->socket, afs_wake_up_async_call, - afs_rx_attach, (unsigned long)call, GFP_KERNEL, call->debug_id) < 0) @@ -739,8 +784,14 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { + struct afs_call *call = (struct afs_call *)user_call_ID; struct afs_net *net = afs_sock2net(sk); + call->peer = rxrpc_kernel_get_call_peer(sk->sk_socket, call->rxcall); + call->server = afs_find_server(call->peer); + if (!call->server) + trace_afs_cm_no_server(call, rxrpc_kernel_remote_srx(call->peer)); + queue_work(afs_wq, &net->charge_preallocation_work); } @@ -767,9 +818,14 @@ static int afs_deliver_cm_op_id(struct afs_call *call) if (!afs_cm_incoming_call(call)) return -ENOTSUPP; + call->security_ix = rxrpc_kernel_query_call_security(call->rxcall, + &call->service_id, + &call->enctype); + trace_afs_cb_call(call); + call->work.func = call->type->work; - /* pass responsibility for the remainer of this message off to the + /* pass responsibility for the remainder of this message off to the * cache manager op */ return call->type->deliver(call); } @@ -918,3 +974,13 @@ noinline int afs_protocol_error(struct afs_call *call, call->unmarshalling_error = true; return -EBADMSG; } + +/* + * Wake up OOB notification processing. + */ +static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob) +{ + struct afs_net *net = sk->sk_user_data; + + schedule_work(&net->rx_oob_work); +} diff --git a/fs/afs/server.c b/fs/afs/server.c index 038f9d0ae3af..a97562f831eb 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -14,188 +14,104 @@ static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ static atomic_t afs_server_debug_id; -static struct afs_server *afs_maybe_use_server(struct afs_server *, - enum afs_server_trace); static void __afs_put_server(struct afs_net *, struct afs_server *); +static void afs_server_timer(struct timer_list *timer); +static void afs_server_destroyer(struct work_struct *work); /* * Find a server by one of its addresses. */ -struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer *peer) +struct afs_server *afs_find_server(const struct rxrpc_peer *peer) { - const struct afs_endpoint_state *estate; - const struct afs_addr_list *alist; - struct afs_server *server = NULL; - unsigned int i; - int seq = 1; + struct afs_server *server = (struct afs_server *)rxrpc_kernel_get_peer_data(peer); - rcu_read_lock(); - - do { - if (server) - afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq); - server = NULL; - seq++; /* 2 on the 1st/lockless path, otherwise odd */ - read_seqbegin_or_lock(&net->fs_addr_lock, &seq); - - hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) { - estate = rcu_dereference(server->endpoint_state); - alist = estate->addresses; - for (i = 0; i < alist->nr_addrs; i++) - if (alist->addrs[i].peer == peer) - goto found; - } - - server = NULL; - continue; - found: - server = afs_maybe_use_server(server, afs_server_trace_get_by_addr); - - } while (need_seqretry(&net->fs_addr_lock, seq)); - - done_seqretry(&net->fs_addr_lock, seq); - - rcu_read_unlock(); - return server; + if (!server) + return NULL; + return afs_use_server(server, false, afs_server_trace_use_cm_call); } /* - * Look up a server by its UUID and mark it active. + * Look up a server by its UUID and mark it active. The caller must hold + * cell->fs_lock. */ -struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) +static struct afs_server *afs_find_server_by_uuid(struct afs_cell *cell, const uuid_t *uuid) { - struct afs_server *server = NULL; + struct afs_server *server; struct rb_node *p; - int diff, seq = 1; + int diff; _enter("%pU", uuid); - do { - /* Unfortunately, rbtree walking doesn't give reliable results - * under just the RCU read lock, so we have to check for - * changes. - */ - if (server) - afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq); - server = NULL; - seq++; /* 2 on the 1st/lockless path, otherwise odd */ - read_seqbegin_or_lock(&net->fs_lock, &seq); - - p = net->fs_servers.rb_node; - while (p) { - server = rb_entry(p, struct afs_server, uuid_rb); - - diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); - if (diff < 0) { - p = p->rb_left; - } else if (diff > 0) { - p = p->rb_right; - } else { - afs_use_server(server, afs_server_trace_get_by_uuid); - break; - } - - server = NULL; - } - } while (need_seqretry(&net->fs_lock, seq)); + p = cell->fs_servers.rb_node; + while (p) { + server = rb_entry(p, struct afs_server, uuid_rb); - done_seqretry(&net->fs_lock, seq); + diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); + if (diff < 0) { + p = p->rb_left; + } else if (diff > 0) { + p = p->rb_right; + } else { + if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) + return NULL; /* Need a write lock */ + afs_use_server(server, true, afs_server_trace_use_by_uuid); + return server; + } + } - _leave(" = %p", server); - return server; + return NULL; } /* - * Install a server record in the namespace tree. If there's a clash, we stick - * it into a list anchored on whichever afs_server struct is actually in the - * tree. + * Install a server record in the cell tree. The caller must hold an exclusive + * lock on cell->fs_lock. */ static struct afs_server *afs_install_server(struct afs_cell *cell, - struct afs_server *candidate) + struct afs_server **candidate) { - const struct afs_endpoint_state *estate; - const struct afs_addr_list *alist; - struct afs_server *server, *next; + struct afs_server *server; struct afs_net *net = cell->net; struct rb_node **pp, *p; int diff; _enter("%p", candidate); - write_seqlock(&net->fs_lock); - /* Firstly install the server in the UUID lookup tree */ - pp = &net->fs_servers.rb_node; + pp = &cell->fs_servers.rb_node; p = NULL; while (*pp) { p = *pp; _debug("- consider %p", p); server = rb_entry(p, struct afs_server, uuid_rb); - diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); - if (diff < 0) { + diff = memcmp(&(*candidate)->uuid, &server->uuid, sizeof(uuid_t)); + if (diff < 0) pp = &(*pp)->rb_left; - } else if (diff > 0) { + else if (diff > 0) pp = &(*pp)->rb_right; - } else { - if (server->cell == cell) - goto exists; - - /* We have the same UUID representing servers in - * different cells. Append the new server to the list. - */ - for (;;) { - next = rcu_dereference_protected( - server->uuid_next, - lockdep_is_held(&net->fs_lock.lock)); - if (!next) - break; - server = next; - } - rcu_assign_pointer(server->uuid_next, candidate); - candidate->uuid_prev = server; - server = candidate; - goto added_dup; - } + else + goto exists; } - server = candidate; + server = *candidate; + *candidate = NULL; rb_link_node(&server->uuid_rb, p, pp); - rb_insert_color(&server->uuid_rb, &net->fs_servers); + rb_insert_color(&server->uuid_rb, &cell->fs_servers); + write_seqlock(&net->fs_lock); hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + write_sequnlock(&net->fs_lock); -added_dup: - write_seqlock(&net->fs_addr_lock); - estate = rcu_dereference_protected(server->endpoint_state, - lockdep_is_held(&net->fs_addr_lock.lock)); - alist = estate->addresses; - - /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install - * it in the IPv4 and/or IPv6 reverse-map lists. - * - * TODO: For speed we want to use something other than a flat list - * here; even sorting the list in terms of lowest address would help a - * bit, but anything we might want to do gets messy and memory - * intensive. - */ - if (alist->nr_addrs > 0) - hlist_add_head_rcu(&server->addr_link, &net->fs_addresses); - - write_sequnlock(&net->fs_addr_lock); + afs_get_cell(cell, afs_cell_trace_get_server); exists: - afs_get_server(server, afs_server_trace_get_install); - write_sequnlock(&net->fs_lock); + afs_use_server(server, true, afs_server_trace_use_install); return server; } /* - * Allocate a new server record and mark it active. + * Allocate a new server record and mark it as active but uncreated. */ -static struct afs_server *afs_alloc_server(struct afs_cell *cell, - const uuid_t *uuid, - struct afs_addr_list *alist) +static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *uuid) { - struct afs_endpoint_state *estate; struct afs_server *server; struct afs_net *net = cell->net; @@ -203,65 +119,50 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); if (!server) - goto enomem; - - estate = kzalloc(sizeof(struct afs_endpoint_state), GFP_KERNEL); - if (!estate) - goto enomem_server; + return NULL; refcount_set(&server->ref, 1); - atomic_set(&server->active, 1); + atomic_set(&server->active, 0); + __set_bit(AFS_SERVER_FL_UNCREATED, &server->flags); server->debug_id = atomic_inc_return(&afs_server_debug_id); - server->addr_version = alist->version; server->uuid = *uuid; rwlock_init(&server->fs_lock); + INIT_WORK(&server->destroyer, &afs_server_destroyer); + timer_setup(&server->timer, afs_server_timer, 0); INIT_LIST_HEAD(&server->volumes); init_waitqueue_head(&server->probe_wq); + mutex_init(&server->cm_token_lock); INIT_LIST_HEAD(&server->probe_link); + INIT_HLIST_NODE(&server->proc_link); spin_lock_init(&server->probe_lock); server->cell = cell; server->rtt = UINT_MAX; server->service_id = FS_SERVICE; - server->probe_counter = 1; server->probed_at = jiffies - LONG_MAX / 2; - refcount_set(&estate->ref, 1); - estate->addresses = alist; - estate->server_id = server->debug_id; - estate->probe_seq = 1; - rcu_assign_pointer(server->endpoint_state, estate); afs_inc_servers_outstanding(net); - trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc); - trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref), - afs_estate_trace_alloc_server); _leave(" = %p", server); return server; - -enomem_server: - kfree(server); -enomem: - _leave(" = NULL [nomem]"); - return NULL; } /* * Look up an address record for a server */ -static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, - struct key *key, const uuid_t *uuid) +static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_server *server, + struct key *key) { struct afs_vl_cursor vc; struct afs_addr_list *alist = NULL; int ret; ret = -ERESTARTSYS; - if (afs_begin_vlserver_operation(&vc, cell, key)) { + if (afs_begin_vlserver_operation(&vc, server->cell, key)) { while (afs_select_vlserver(&vc)) { if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) - alist = afs_yfsvl_get_endpoints(&vc, uuid); + alist = afs_yfsvl_get_endpoints(&vc, &server->uuid); else - alist = afs_vl_get_addrs_u(&vc, uuid); + alist = afs_vl_get_addrs_u(&vc, &server->uuid); } ret = afs_end_vlserver_operation(&vc); @@ -271,72 +172,122 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, } /* - * Get or create a fileserver record. + * Get or create a fileserver record and return it with an active-use count on + * it. */ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, const uuid_t *uuid, u32 addr_version) { - struct afs_addr_list *alist; - struct afs_server *server, *candidate; + struct afs_addr_list *alist = NULL; + struct afs_server *server, *candidate = NULL; + bool creating = false; + int ret; _enter("%p,%pU", cell->net, uuid); - server = afs_find_server_by_uuid(cell->net, uuid); + down_read(&cell->fs_lock); + server = afs_find_server_by_uuid(cell, uuid); + /* Won't see servers marked uncreated. */ + up_read(&cell->fs_lock); + if (server) { + timer_delete_sync(&server->timer); + if (test_bit(AFS_SERVER_FL_CREATING, &server->flags)) + goto wait_for_creation; if (server->addr_version != addr_version) set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags); return server; } - alist = afs_vl_lookup_addrs(cell, key, uuid); - if (IS_ERR(alist)) - return ERR_CAST(alist); - - candidate = afs_alloc_server(cell, uuid, alist); + candidate = afs_alloc_server(cell, uuid); if (!candidate) { afs_put_addrlist(alist, afs_alist_trace_put_server_oom); return ERR_PTR(-ENOMEM); } - server = afs_install_server(cell, candidate); - if (server != candidate) { - afs_put_addrlist(alist, afs_alist_trace_put_server_dup); + down_write(&cell->fs_lock); + server = afs_install_server(cell, &candidate); + if (test_bit(AFS_SERVER_FL_CREATING, &server->flags)) { + /* We need to wait for creation to complete. */ + up_write(&cell->fs_lock); + goto wait_for_creation; + } + if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) { + set_bit(AFS_SERVER_FL_CREATING, &server->flags); + clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags); + creating = true; + } + up_write(&cell->fs_lock); + timer_delete_sync(&server->timer); + + /* If we get to create the server, we look up the addresses and then + * immediately dispatch an asynchronous probe to each interface on the + * fileserver. This will make sure the repeat-probing service is + * started. + */ + if (creating) { + alist = afs_vl_lookup_addrs(server, key); + if (IS_ERR(alist)) { + ret = PTR_ERR(alist); + goto create_failed; + } + + ret = afs_fs_probe_fileserver(cell->net, server, alist, key); + if (ret) + goto create_failed; + + clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags); + } + +out: + afs_put_addrlist(alist, afs_alist_trace_put_server_create); + if (candidate) { + kfree(rcu_access_pointer(server->endpoint_state)); kfree(candidate); - } else { - /* Immediately dispatch an asynchronous probe to each interface - * on the fileserver. This will make sure the repeat-probing - * service is started. - */ - afs_fs_probe_fileserver(cell->net, server, alist, key); + afs_dec_servers_outstanding(cell->net); + } + return server ?: ERR_PTR(ret); + +wait_for_creation: + afs_see_server(server, afs_server_trace_wait_create); + wait_on_bit(&server->flags, AFS_SERVER_FL_CREATING, TASK_UNINTERRUPTIBLE); + if (test_bit_acquire(AFS_SERVER_FL_UNCREATED, &server->flags)) { + /* Barrier: read flag before error */ + ret = READ_ONCE(server->create_error); + afs_put_server(cell->net, server, afs_server_trace_unuse_create_fail); + server = NULL; + goto out; } - return server; -} + ret = 0; + goto out; -/* - * Set the server timer to fire after a given delay, assuming it's not already - * set for an earlier time. - */ -static void afs_set_server_timer(struct afs_net *net, time64_t delay) -{ - if (net->live) { - afs_inc_servers_outstanding(net); - if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) - afs_dec_servers_outstanding(net); +create_failed: + down_write(&cell->fs_lock); + + WRITE_ONCE(server->create_error, ret); + smp_wmb(); /* Barrier: set error before flag. */ + set_bit(AFS_SERVER_FL_UNCREATED, &server->flags); + + clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags); + + if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) { + clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags); + creating = true; } + afs_unuse_server(cell->net, server, afs_server_trace_unuse_create_fail); + server = NULL; + + up_write(&cell->fs_lock); + goto out; } /* - * Server management timer. We have an increment on fs_outstanding that we - * need to pass along to the work item. + * Set/reduce a server's timer. */ -void afs_servers_timer(struct timer_list *timer) +static void afs_set_server_timer(struct afs_server *server, unsigned int delay_secs) { - struct afs_net *net = container_of(timer, struct afs_net, fs_timer); - - _enter(""); - if (!queue_work(afs_wq, &net->fs_manager)) - afs_dec_servers_outstanding(net); + mod_timer(&server->timer, jiffies + delay_secs * HZ); } /* @@ -355,32 +306,20 @@ struct afs_server *afs_get_server(struct afs_server *server, } /* - * Try to get a reference on a server object. + * Get an active count on a server object and maybe remove from the inactive + * list. */ -static struct afs_server *afs_maybe_use_server(struct afs_server *server, - enum afs_server_trace reason) -{ - unsigned int a; - int r; - - if (!__refcount_inc_not_zero(&server->ref, &r)) - return NULL; - - a = atomic_inc_return(&server->active); - trace_afs_server(server->debug_id, r + 1, a, reason); - return server; -} - -/* - * Get an active count on a server object. - */ -struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason) +struct afs_server *afs_use_server(struct afs_server *server, bool activate, + enum afs_server_trace reason) { unsigned int a; int r; __refcount_inc(&server->ref, &r); a = atomic_inc_return(&server->active); + if (a == 1 && activate && + !test_bit(AFS_SERVER_FL_EXPIRED, &server->flags)) + timer_delete(&server->timer); trace_afs_server(server->debug_id, r + 1, a, reason); return server; @@ -413,13 +352,16 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server, enum afs_server_trace reason) { - if (server) { - unsigned int active = atomic_dec_return(&server->active); + if (!server) + return; - if (active == 0) - afs_set_server_timer(net, afs_server_gc_delay); - afs_put_server(net, server, reason); + if (atomic_dec_and_test(&server->active)) { + if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) || + READ_ONCE(server->cell->state) >= AFS_CELL_REMOVING) + schedule_work(&server->destroyer); } + + afs_put_server(net, server, reason); } /* @@ -428,10 +370,22 @@ void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server, void afs_unuse_server(struct afs_net *net, struct afs_server *server, enum afs_server_trace reason) { - if (server) { - server->unuse_time = ktime_get_real_seconds(); - afs_unuse_server_notime(net, server, reason); + if (!server) + return; + + if (atomic_dec_and_test(&server->active)) { + if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) && + READ_ONCE(server->cell->state) < AFS_CELL_REMOVING) { + time64_t unuse_time = ktime_get_real_seconds(); + + server->unuse_time = unuse_time; + afs_set_server_timer(server, afs_server_gc_delay); + } else { + schedule_work(&server->destroyer); + } } + + afs_put_server(net, server, reason); } static void afs_server_rcu(struct rcu_head *rcu) @@ -442,6 +396,8 @@ static void afs_server_rcu(struct rcu_head *rcu) atomic_read(&server->active), afs_server_trace_free); afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state), afs_estate_trace_put_server); + afs_put_cell(server->cell, afs_cell_trace_put_server); + kfree(server->cm_rxgk_appdata.data); kfree(server); } @@ -460,159 +416,119 @@ static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server } /* - * destroy a dead server + * Check to see if the server record has expired. */ -static void afs_destroy_server(struct afs_net *net, struct afs_server *server) +static bool afs_has_server_expired(const struct afs_server *server) { - if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) - afs_give_up_callbacks(net, server); + time64_t expires_at; - afs_put_server(net, server, afs_server_trace_destroy); + if (atomic_read(&server->active)) + return false; + + if (server->cell->net->live || + server->cell->state >= AFS_CELL_REMOVING) { + trace_afs_server(server->debug_id, refcount_read(&server->ref), + 0, afs_server_trace_purging); + return true; + } + + expires_at = server->unuse_time; + if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && + !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) + expires_at += afs_server_gc_delay; + + return ktime_get_real_seconds() > expires_at; } /* - * Garbage collect any expired servers. + * Remove a server record from it's parent cell's database. */ -static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) +static bool afs_remove_server_from_cell(struct afs_server *server) { - struct afs_server *server, *next, *prev; - int active; - - while ((server = gc_list)) { - gc_list = server->gc_next; - - write_seqlock(&net->fs_lock); - - active = atomic_read(&server->active); - if (active == 0) { - trace_afs_server(server->debug_id, refcount_read(&server->ref), - active, afs_server_trace_gc); - next = rcu_dereference_protected( - server->uuid_next, lockdep_is_held(&net->fs_lock.lock)); - prev = server->uuid_prev; - if (!prev) { - /* The one at the front is in the tree */ - if (!next) { - rb_erase(&server->uuid_rb, &net->fs_servers); - } else { - rb_replace_node_rcu(&server->uuid_rb, - &next->uuid_rb, - &net->fs_servers); - next->uuid_prev = NULL; - } - } else { - /* This server is not at the front */ - rcu_assign_pointer(prev->uuid_next, next); - if (next) - next->uuid_prev = prev; - } - - list_del(&server->probe_link); - hlist_del_rcu(&server->proc_link); - if (!hlist_unhashed(&server->addr_link)) - hlist_del_rcu(&server->addr_link); - } - write_sequnlock(&net->fs_lock); + struct afs_cell *cell = server->cell; + + down_write(&cell->fs_lock); - if (active == 0) - afs_destroy_server(net, server); + if (!afs_has_server_expired(server)) { + up_write(&cell->fs_lock); + return false; } + + set_bit(AFS_SERVER_FL_EXPIRED, &server->flags); + _debug("expire %pU %u", &server->uuid, atomic_read(&server->active)); + afs_see_server(server, afs_server_trace_see_expired); + rb_erase(&server->uuid_rb, &cell->fs_servers); + up_write(&cell->fs_lock); + return true; } -/* - * Manage the records of servers known to be within a network namespace. This - * includes garbage collecting unused servers. - * - * Note also that we were given an increment on net->servers_outstanding by - * whoever queued us that we need to deal with before returning. - */ -void afs_manage_servers(struct work_struct *work) +static void afs_server_destroyer(struct work_struct *work) { - struct afs_net *net = container_of(work, struct afs_net, fs_manager); - struct afs_server *gc_list = NULL; - struct rb_node *cursor; - time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; - bool purging = !net->live; - - _enter(""); + struct afs_endpoint_state *estate; + struct afs_server *server = container_of(work, struct afs_server, destroyer); + struct afs_net *net = server->cell->net; - /* Trawl the server list looking for servers that have expired from - * lack of use. - */ - read_seqlock_excl(&net->fs_lock); + afs_see_server(server, afs_server_trace_see_destroyer); - for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { - struct afs_server *server = - rb_entry(cursor, struct afs_server, uuid_rb); - int active = atomic_read(&server->active); + if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags)) + return; - _debug("manage %pU %u", &server->uuid, active); + if (!afs_remove_server_from_cell(server)) + return; - if (purging) { - trace_afs_server(server->debug_id, refcount_read(&server->ref), - active, afs_server_trace_purging); - if (active != 0) - pr_notice("Can't purge s=%08x\n", server->debug_id); - } + timer_shutdown_sync(&server->timer); + cancel_work(&server->destroyer); - if (active == 0) { - time64_t expire_at = server->unuse_time; - - if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && - !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) - expire_at += afs_server_gc_delay; - if (purging || expire_at <= now) { - server->gc_next = gc_list; - gc_list = server; - } else if (expire_at < next_manage) { - next_manage = expire_at; - } - } - } + if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) + afs_give_up_callbacks(net, server); - read_sequnlock_excl(&net->fs_lock); + /* Unbind the rxrpc_peer records from the server. */ + estate = rcu_access_pointer(server->endpoint_state); + if (estate) + afs_set_peer_appdata(server, estate->addresses, NULL); - /* Update the timer on the way out. We have to pass an increment on - * servers_outstanding in the namespace that we are in to the timer or - * the work scheduler. - */ - if (!purging && next_manage < TIME64_MAX) { - now = ktime_get_real_seconds(); + write_seqlock(&net->fs_lock); + list_del_init(&server->probe_link); + if (!hlist_unhashed(&server->proc_link)) + hlist_del_rcu(&server->proc_link); + write_sequnlock(&net->fs_lock); - if (next_manage - now <= 0) { - if (queue_work(afs_wq, &net->fs_manager)) - afs_inc_servers_outstanding(net); - } else { - afs_set_server_timer(net, next_manage - now); - } - } + afs_put_server(net, server, afs_server_trace_destroy); +} - afs_gc_servers(net, gc_list); +static void afs_server_timer(struct timer_list *timer) +{ + struct afs_server *server = container_of(timer, struct afs_server, timer); - afs_dec_servers_outstanding(net); - _leave(" [%d]", atomic_read(&net->servers_outstanding)); + afs_see_server(server, afs_server_trace_see_timer); + if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags)) + schedule_work(&server->destroyer); } -static void afs_queue_server_manager(struct afs_net *net) +/* + * Wake up all the servers in a cell so that they can purge themselves. + */ +void afs_purge_servers(struct afs_cell *cell) { - afs_inc_servers_outstanding(net); - if (!queue_work(afs_wq, &net->fs_manager)) - afs_dec_servers_outstanding(net); + struct afs_server *server; + struct rb_node *rb; + + down_read(&cell->fs_lock); + for (rb = rb_first(&cell->fs_servers); rb; rb = rb_next(rb)) { + server = rb_entry(rb, struct afs_server, uuid_rb); + afs_see_server(server, afs_server_trace_see_purge); + schedule_work(&server->destroyer); + } + up_read(&cell->fs_lock); } /* - * Purge list of servers. + * Wait for outstanding servers. */ -void afs_purge_servers(struct afs_net *net) +void afs_wait_for_servers(struct afs_net *net) { _enter(""); - if (del_timer_sync(&net->fs_timer)) - afs_dec_servers_outstanding(net); - - afs_queue_server_manager(net); - - _debug("wait"); atomic_dec(&net->servers_outstanding); wait_var_event(&net->servers_outstanding, !atomic_read(&net->servers_outstanding)); @@ -636,7 +552,7 @@ static noinline bool afs_update_server_record(struct afs_operation *op, atomic_read(&server->active), afs_server_trace_update); - alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid); + alist = afs_vl_lookup_addrs(server, op->key); if (IS_ERR(alist)) { rcu_read_lock(); estate = rcu_dereference(server->endpoint_state); diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 7e7e567a7f8a..20d5474837df 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -16,7 +16,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) if (slist && refcount_dec_and_test(&slist->usage)) { for (i = 0; i < slist->nr_servers; i++) afs_unuse_server(net, slist->servers[i].server, - afs_server_trace_put_slist); + afs_server_trace_unuse_slist); kfree_rcu(slist, rcu); } } @@ -97,8 +97,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { - afs_put_server(volume->cell->net, server, - afs_server_trace_put_slist_isort); + afs_unuse_server_notime(volume->cell->net, server, + afs_server_trace_unuse_slist_isort); continue; } diff --git a/fs/afs/super.c b/fs/afs/super.c index f3ba1c3e72f5..25b306db6992 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -194,8 +194,6 @@ static int afs_show_options(struct seq_file *m, struct dentry *root) if (as->dyn_root) seq_puts(m, ",dyn"); - if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) - seq_puts(m, ",autocell"); switch (as->flock_mode) { case afs_flock_mode_unset: break; case afs_flock_mode_local: p = "local"; break; @@ -292,13 +290,14 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) /* lookup the cell record */ if (cellname) { cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, - NULL, false); + NULL, false, + afs_cell_trace_use_lookup_mount); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse); + afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_parse); afs_see_cell(cell, afs_cell_trace_see_source); ctx->cell = cell; } @@ -395,7 +394,7 @@ static int afs_validate_fc(struct fs_context *fc) ctx->key = NULL; cell = afs_use_cell(ctx->cell->alias_of, afs_cell_trace_use_fc_alias); - afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); + afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc); ctx->cell = cell; goto reget_key; } @@ -468,7 +467,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) /* allocate the root inode and dentry */ if (as->dyn_root) { - inode = afs_iget_pseudo_dir(sb, true); + inode = afs_dynroot_iget_root(sb); } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); @@ -478,9 +477,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) if (IS_ERR(inode)) return PTR_ERR(inode); - if (ctx->autocell || as->dyn_root) - set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); - ret = -ENOMEM; sb->s_root = d_make_root(inode); if (!sb->s_root) @@ -488,9 +484,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) if (as->dyn_root) { sb->s_d_op = &afs_dynroot_dentry_operations; - ret = afs_dynroot_populate(sb); - if (ret < 0) - goto error; } else { sb->s_d_op = &afs_fs_dentry_operations; rcu_assign_pointer(as->volume->sb, sb); @@ -527,9 +520,8 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { - struct afs_net *net = afs_net(as->net_ns); afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi); - afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi); + afs_unuse_cell(as->cell, afs_cell_trace_unuse_sbi); put_net(as->net_ns); kfree(as); } @@ -539,9 +531,6 @@ static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); - if (as->dyn_root) - afs_dynroot_depopulate(sb); - /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ @@ -615,7 +604,7 @@ static void afs_free_fc(struct fs_context *fc) afs_destroy_sbi(fc->s_fs_info); afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc); - afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); + afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc); key_put(ctx->key); kfree(ctx); } @@ -663,7 +652,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->netfs.inode); - mutex_init(&vnode->io_lock); + INIT_LIST_HEAD(&vnode->io_lock_waiters); init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); @@ -696,6 +685,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb) vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; + vnode->directory = NULL; + vnode->directory_size = 0; vnode->flags = 1 << AFS_VNODE_UNSET; vnode->lock_state = AFS_VNODE_LOCK_NONE; diff --git a/fs/afs/validation.c b/fs/afs/validation.c index 32a53fc8dfb2..0ba8336c9025 100644 --- a/fs/afs/validation.c +++ b/fs/afs/validation.c @@ -120,22 +120,31 @@ bool afs_check_validity(const struct afs_vnode *vnode) { const struct afs_volume *volume = vnode->volume; + enum afs_vnode_invalid_trace trace = afs_vnode_valid_trace; + time64_t cb_expires_at = atomic64_read(&vnode->cb_expires_at); time64_t deadline = ktime_get_real_seconds() + 10; if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) return true; - if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) || - atomic64_read(&vnode->cb_expires_at) <= deadline || - volume->cb_expires_at <= deadline || - vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot) || - vnode->cb_scrub != atomic_read(&volume->cb_scrub) || - test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - _debug("inval"); - return false; - } - - return true; + if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break)) + trace = afs_vnode_invalid_trace_cb_v_break; + else if (cb_expires_at == AFS_NO_CB_PROMISE) + trace = afs_vnode_invalid_trace_no_cb_promise; + else if (cb_expires_at <= deadline) + trace = afs_vnode_invalid_trace_expired; + else if (volume->cb_expires_at <= deadline) + trace = afs_vnode_invalid_trace_vol_expired; + else if (vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot)) + trace = afs_vnode_invalid_trace_cb_ro_snapshot; + else if (vnode->cb_scrub != atomic_read(&volume->cb_scrub)) + trace = afs_vnode_invalid_trace_cb_scrub; + else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + trace = afs_vnode_invalid_trace_zap_data; + else + return true; + trace_afs_vnode_invalid(vnode, trace); + return false; } /* @@ -365,9 +374,9 @@ static void afs_zap_data(struct afs_vnode *vnode) * written back in a regular file and completely discard the pages in a * directory or symlink */ if (S_ISREG(vnode->netfs.inode.i_mode)) - invalidate_remote_inode(&vnode->netfs.inode); + filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX); else - invalidate_inode_pages2(vnode->netfs.inode.i_mapping); + filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX); } /* diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 9f36e14f1c2d..709b4cdb723e 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -205,11 +205,11 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key) goto is_alias; if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { - afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); + afs_unuse_cell(p, afs_cell_trace_unuse_check_alias); return -ERESTARTSYS; } - afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); + afs_unuse_cell(p, afs_cell_trace_unuse_check_alias); } mutex_unlock(&cell->net->proc_cells_lock); @@ -253,6 +253,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key) static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) { struct afs_cell *master; + size_t name_len; char *cell_name; cell_name = afs_vl_get_cell_name(cell, key); @@ -264,8 +265,12 @@ static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) return 0; } - master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name), - NULL, false); + name_len = strlen(cell_name); + if (!name_len || name_len > AFS_MAXCELLNAME) + master = ERR_PTR(-EOPNOTSUPP); + else + master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, false, + afs_cell_trace_use_lookup_canonical); kfree(cell_name); if (IS_ERR(master)) return PTR_ERR(master); diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index d8f79f6ada3d..6ad9688d8f4b 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -48,7 +48,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) cell->dns_expiry <= ktime_get_real_seconds()) { dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); - afs_queue_cell(cell, afs_cell_trace_get_queue_dns); + afs_queue_cell(cell, afs_cell_trace_queue_dns); if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { if (wait_var_event_interruptible( diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index cac75f89b64a..3a23c0b08eb6 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -370,6 +370,7 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { .name = "VL.GetCapabilities", .op = afs_VL_GetCapabilities, .deliver = afs_deliver_vl_get_capabilities, + .immediate_cancel = afs_vlserver_probe_result, .done = afs_vlserver_probe_result, .destructor = afs_destroy_vl_get_capabilities, }; @@ -697,7 +698,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call) return ret; namesz = ntohl(call->tmp); - if (namesz > AFS_MAXCELLNAME) + if (namesz > YFS_VL_MAXCELLNAME) return afs_protocol_error(call, afs_eproto_cellname_len); paddedsz = (namesz + 3) & ~3; call->count = namesz; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index af3a3f57c1b3..0efff3d25133 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -10,6 +10,7 @@ #include "internal.h" static unsigned __read_mostly afs_volume_record_life = 60 * 60; +static atomic_t afs_volume_debug_id; static void afs_destroy_volume(struct work_struct *work); @@ -59,7 +60,7 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume) struct afs_cell *cell = volume->cell; if (!hlist_unhashed(&volume->proc_link)) { - trace_afs_volume(volume->vid, refcount_read(&cell->ref), + trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref), afs_volume_trace_remove); write_seqlock(&cell->volume_lock); hlist_del_rcu(&volume->proc_link); @@ -84,6 +85,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, if (!volume) goto error_0; + volume->debug_id = atomic_inc_return(&afs_volume_debug_id); volume->vid = vldb->vid[params->type]; volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); @@ -115,7 +117,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, *_slist = slist; rcu_assign_pointer(volume->servers, slist); - trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc); + trace_afs_volume(volume->debug_id, volume->vid, 1, afs_volume_trace_alloc); return volume; error_1: @@ -247,7 +249,7 @@ static void afs_destroy_volume(struct work_struct *work) afs_remove_volume_from_cell(volume); afs_put_serverlist(volume->cell->net, slist); afs_put_cell(volume->cell, afs_cell_trace_put_vol); - trace_afs_volume(volume->vid, refcount_read(&volume->ref), + trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref), afs_volume_trace_free); kfree_rcu(volume, rcu); @@ -262,7 +264,7 @@ bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason) int r; if (__refcount_inc_not_zero(&volume->ref, &r)) { - trace_afs_volume(volume->vid, r + 1, reason); + trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason); return true; } return false; @@ -278,7 +280,7 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume, int r; __refcount_inc(&volume->ref, &r); - trace_afs_volume(volume->vid, r + 1, reason); + trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason); } return volume; } @@ -290,12 +292,13 @@ struct afs_volume *afs_get_volume(struct afs_volume *volume, void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason) { if (volume) { + unsigned int debug_id = volume->debug_id; afs_volid_t vid = volume->vid; bool zero; int r; zero = __refcount_dec_and_test(&volume->ref, &r); - trace_afs_volume(vid, r - 1, reason); + trace_afs_volume(debug_id, vid, r - 1, reason); if (zero) schedule_work(&volume->destructor); } diff --git a/fs/afs/write.c b/fs/afs/write.c index 74402d95a884..18b0a9f1615e 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -29,43 +29,39 @@ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsign /* * Find a key to use for the writeback. We cached the keys used to author the - * writes on the vnode. *_wbk will contain the last writeback key used or NULL - * and we need to start from there if it's set. + * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key + * record used or NULL and we need to start from there if it's set. + * wreq->netfs_priv will be set to the key itself or NULL. */ -static int afs_get_writeback_key(struct afs_vnode *vnode, - struct afs_wb_key **_wbk) +static void afs_get_writeback_key(struct netfs_io_request *wreq) { - struct afs_wb_key *wbk = NULL; - struct list_head *p; - int ret = -ENOKEY, ret2; + struct afs_wb_key *wbk, *old = wreq->netfs_priv2; + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); + + key_put(wreq->netfs_priv); + wreq->netfs_priv = NULL; + wreq->netfs_priv2 = NULL; spin_lock(&vnode->wb_lock); - if (*_wbk) - p = (*_wbk)->vnode_link.next; + if (old) + wbk = list_next_entry(old, vnode_link); else - p = vnode->wb_keys.next; + wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link); - while (p != &vnode->wb_keys) { - wbk = list_entry(p, struct afs_wb_key, vnode_link); + list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) { _debug("wbk %u", key_serial(wbk->key)); - ret2 = key_validate(wbk->key); - if (ret2 == 0) { + if (key_validate(wbk->key) == 0) { refcount_inc(&wbk->usage); + wreq->netfs_priv = key_get(wbk->key); + wreq->netfs_priv2 = wbk; _debug("USE WB KEY %u", key_serial(wbk->key)); break; } - - wbk = NULL; - if (ret == -ENOKEY) - ret = ret2; - p = p->next; } spin_unlock(&vnode->wb_lock); - if (*_wbk) - afs_put_wb_key(*_wbk); - *_wbk = wbk; - return 0; + + afs_put_wb_key(old); } static void afs_store_data_success(struct afs_operation *op) @@ -75,8 +71,7 @@ static void afs_store_data_success(struct afs_operation *op) op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); if (!afs_op_error(op)) { - if (!op->store.laundering) - afs_pages_written_back(vnode, op->store.pos, op->store.size); + afs_pages_written_back(vnode, op->store.pos, op->store.size); afs_stat_v(vnode, n_stores); atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes); } @@ -89,113 +84,142 @@ static const struct afs_operation_ops afs_store_data_operation = { }; /* - * write to a file + * Prepare a subrequest to write to the server. This sets the max_len + * parameter. + */ +void afs_prepare_write(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_stream *stream = &subreq->rreq->io_streams[subreq->stream_nr]; + + //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) + // subreq->max_len = 512 * 1024; + //else + stream->sreq_max_len = 256 * 1024 * 1024; +} + +/* + * Issue a subrequest to write to the server. */ -static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos, - bool laundering) +static void afs_issue_write_worker(struct work_struct *work) { + struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work); + struct netfs_io_request *wreq = subreq->rreq; struct afs_operation *op; - struct afs_wb_key *wbk = NULL; - loff_t size = iov_iter_count(iter); + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); + unsigned long long pos = subreq->start + subreq->transferred; + size_t len = subreq->len - subreq->transferred; int ret = -ENOKEY; - _enter("%s{%llx:%llu.%u},%llx,%llx", + _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx", + wreq->debug_id, subreq->debug_index, vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - size, pos); + pos, len); - ret = afs_get_writeback_key(vnode, &wbk); - if (ret) { - _leave(" = %d [no keys]", ret); - return ret; - } +#if 0 // Error injection + if (subreq->debug_index == 3) + return netfs_write_subrequest_terminated(subreq, -ENOANO, false); - op = afs_alloc_operation(wbk->key, vnode->volume); - if (IS_ERR(op)) { - afs_put_wb_key(wbk); - return -ENOMEM; + if (!subreq->retry_count) { + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); + return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); } +#endif + + op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); + if (IS_ERR(op)) + return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); afs_op_set_vnode(op, 0, vnode); - op->file[0].dv_delta = 1; + op->file[0].dv_delta = 1; op->file[0].modification = true; - op->store.pos = pos; - op->store.size = size; - op->store.laundering = laundering; - op->flags |= AFS_OPERATION_UNINTR; - op->ops = &afs_store_data_operation; + op->store.pos = pos; + op->store.size = len; + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_store_data_operation; -try_next_key: afs_begin_vnode_operation(op); - op->store.write_iter = iter; - op->store.i_size = max(pos + size, vnode->netfs.remote_i_size); - op->mtime = inode_get_mtime(&vnode->netfs.inode); + op->store.write_iter = &subreq->io_iter; + op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size); + op->mtime = inode_get_mtime(&vnode->netfs.inode); afs_wait_for_operation(op); - - switch (afs_op_error(op)) { + ret = afs_put_operation(op); + switch (ret) { + case 0: + __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); + break; case -EACCES: case -EPERM: case -ENOKEY: case -EKEYEXPIRED: case -EKEYREJECTED: case -EKEYREVOKED: - _debug("next"); - - ret = afs_get_writeback_key(vnode, &wbk); - if (ret == 0) { - key_put(op->key); - op->key = key_get(wbk->key); - goto try_next_key; - } + /* If there are more keys we can try, use the retry algorithm + * to rotate the keys. + */ + if (wreq->netfs_priv2) + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); break; } - afs_put_wb_key(wbk); - _leave(" = %d", afs_op_error(op)); - return afs_put_operation(op); + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); } -static void afs_upload_to_server(struct netfs_io_subrequest *subreq) +void afs_issue_write(struct netfs_io_subrequest *subreq) { - struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); - ssize_t ret; - - _enter("%x[%x],%zx", - subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count); - - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); - ret = afs_store_data(vnode, &subreq->io_iter, subreq->start, - subreq->rreq->origin == NETFS_LAUNDER_WRITE); - netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, - false); + subreq->work.func = afs_issue_write_worker; + if (!queue_work(system_unbound_wq, &subreq->work)) + WARN_ON_ONCE(1); } -static void afs_upload_to_server_worker(struct work_struct *work) +/* + * Writeback calls this when it finds a folio that needs uploading. This isn't + * called if writeback only has copy-to-cache to deal with. + */ +void afs_begin_writeback(struct netfs_io_request *wreq) { - struct netfs_io_subrequest *subreq = - container_of(work, struct netfs_io_subrequest, work); - - afs_upload_to_server(subreq); + if (S_ISREG(wreq->inode->i_mode)) + afs_get_writeback_key(wreq); } /* - * Set up write requests for a writeback slice. We need to add a write request - * for each write we want to make. + * Prepare to retry the writes in request. Use this to try rotating the + * available writeback keys. */ -void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) +void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream) { - struct netfs_io_subrequest *subreq; - - _enter("%x,%llx-%llx", wreq->debug_id, start, start + len); + struct netfs_io_subrequest *subreq = + list_first_entry(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); + + switch (wreq->origin) { + case NETFS_READAHEAD: + case NETFS_READPAGE: + case NETFS_READ_GAPS: + case NETFS_READ_SINGLE: + case NETFS_READ_FOR_WRITE: + case NETFS_DIO_READ: + return; + default: + break; + } - subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, - start, len, afs_upload_to_server_worker); - if (subreq) - netfs_queue_write_request(subreq); + switch (subreq->error) { + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_get_writeback_key(wreq); + if (!wreq->netfs_priv) + stream->failed = true; + break; + } } /* diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index 8ca868164507..cc5f143d21a3 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -88,7 +88,7 @@ union afs_xdr_dir_block { struct { struct afs_xdr_dir_hdr hdr; - u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS]; + u8 alloc_ctrs[AFS_DIR_BLOCKS_WITH_CTR]; __be16 hashtable[AFS_DIR_HASHTBL_SIZE]; } meta; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index f521e66d3bf6..257af259c04a 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -352,18 +352,19 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call) static int yfs_deliver_fs_fetch_data64(struct afs_call *call) { struct afs_operation *op = call->op; + struct netfs_io_subrequest *subreq = op->fetch.subreq; struct afs_vnode_param *vp = &op->file[0]; - struct afs_read *req = op->fetch.req; const __be32 *bp; + size_t count_before; int ret; _enter("{%u,%zu, %zu/%llu}", call->unmarshall, call->iov_len, iov_iter_count(call->iter), - req->actual_len); + call->remaining); switch (call->unmarshall) { case 0: - req->actual_len = 0; + call->remaining = 0; afs_extract_to_tmp64(call); call->unmarshall++; fallthrough; @@ -378,38 +379,39 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) if (ret < 0) return ret; - req->actual_len = be64_to_cpu(call->tmp64); - _debug("DATA length: %llu", req->actual_len); + call->remaining = be64_to_cpu(call->tmp64); + _debug("DATA length: %llu", call->remaining); - if (req->actual_len == 0) + if (call->remaining == 0) goto no_more_data; - call->iter = req->iter; - call->iov_len = min(req->actual_len, req->len); + call->iter = &subreq->io_iter; + call->iov_len = min(call->remaining, subreq->len - subreq->transferred); call->unmarshall++; fallthrough; /* extract the returned data */ case 2: - _debug("extract data %zu/%llu", - iov_iter_count(call->iter), req->actual_len); + count_before = call->iov_len; + _debug("extract data %zu/%llu", count_before, call->remaining); ret = afs_extract_data(call, true); + subreq->transferred += count_before - call->iov_len; if (ret < 0) return ret; call->iter = &call->def_iter; - if (req->actual_len <= req->len) + if (call->remaining) goto no_more_data; /* Discard any excess data the server gave us */ - afs_extract_discard(call, req->actual_len - req->len); + afs_extract_discard(call, call->remaining); call->unmarshall = 3; fallthrough; case 3: _debug("extract discard %zu/%llu", - iov_iter_count(call->iter), req->actual_len - req->len); + iov_iter_count(call->iter), call->remaining); ret = afs_extract_data(call, true); if (ret < 0) @@ -434,8 +436,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) xdr_decode_YFSCallBack(&bp, call, &vp->scb); xdr_decode_YFSVolSync(&bp, &op->volsync); - req->data_version = vp->scb.status.data_version; - req->file_size = vp->scb.status.size; + if (subreq->start + subreq->transferred >= vp->scb.status.size) + __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); call->unmarshall++; fallthrough; @@ -454,7 +456,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) static const struct afs_call_type yfs_RXYFSFetchData64 = { .name = "YFS.FetchData64", .op = yfs_FS_FetchData64, + .async_rx = afs_fetch_data_async_rx, .deliver = yfs_deliver_fs_fetch_data64, + .immediate_cancel = afs_fetch_data_immediate_cancel, .destructor = afs_flat_call_destructor, }; @@ -463,14 +467,15 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = { */ void yfs_fs_fetch_data(struct afs_operation *op) { + struct netfs_io_subrequest *subreq = op->fetch.subreq; struct afs_vnode_param *vp = &op->file[0]; - struct afs_read *req = op->fetch.req; struct afs_call *call; __be32 *bp; - _enter(",%x,{%llx:%llu},%llx,%llx", + _enter(",%x,{%llx:%llu},%llx,%zx", key_serial(op->key), vp->fid.vid, vp->fid.vnode, - req->pos, req->len); + subreq->start + subreq->transferred, + subreq->len - subreq->transferred); call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64, sizeof(__be32) * 2 + @@ -482,15 +487,16 @@ void yfs_fs_fetch_data(struct afs_operation *op) if (!call) return afs_op_nomem(op); - req->call_debug_id = call->debug_id; + if (op->flags & AFS_OPERATION_ASYNC) + call->async = true; /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSFETCHDATA64); bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &vp->fid); - bp = xdr_encode_u64(bp, req->pos); - bp = xdr_encode_u64(bp, req->len); + bp = xdr_encode_u64(bp, subreq->start + subreq->transferred); + bp = xdr_encode_u64(bp, subreq->len - subreq->transferred); yfs_check_req(call, bp); call->fid = vp->fid; @@ -661,8 +667,9 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call) static void yfs_done_fs_remove_file2(struct afs_call *call) { if (call->error == -ECONNABORTED && - call->abort_code == RX_INVALID_OPERATION) { - set_bit(AFS_SERVER_FL_NO_RM2, &call->server->flags); + (call->abort_code == RX_INVALID_OPERATION || + call->abort_code == RXGEN_OPCODE)) { + set_bit(AFS_SERVER_FL_NO_RM2, &call->op->server->flags); call->op->flags |= AFS_OPERATION_DOWNGRADE; } } |