1 files changed, 662 insertions, 338 deletions
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1a4ce07fb406..dde1857fcabb 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -23,47 +23,186 @@
 #include <linux/namei.h>
 #include <linux/iversion.h>
 #include "internal.h"
+#include "afs_fs.h"
+
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
+{
+	size_t size = strlen(op->create.symlink) + 1;
+	size_t dsize = 0;
+	char *p;
+
+	if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
+				      mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
+		return;
+
+	vnode->directory_size = dsize;
+	p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+	memcpy(p, op->create.symlink, size);
+	kunmap_local(p);
+	set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+	netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+}
+
+static void afs_put_link(void *arg)
+{
+	struct folio *folio = virt_to_folio(arg);
+
+	kunmap_local(arg);
+	folio_put(folio);
+}
+
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+			 struct delayed_call *callback)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct folio *folio;
+	char *content;
+	ssize_t ret;
+
+	if (!dentry) {
+		/* RCU pathwalk. */
+		if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
+			return ERR_PTR(-ECHILD);
+		goto good;
+	}
+
+	if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+		goto fetch;
+
+	ret = afs_validate(vnode, NULL);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+	    test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+		goto good;
+
+fetch:
+	ret = afs_read_single(vnode, NULL);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+
+good:
+	folio = folioq_folio(vnode->directory, 0);
+	folio_get(folio);
+	content = kmap_local_folio(folio, 0);
+	set_delayed_call(callback, afs_put_link, content);
+	return content;
+}
+
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+	DEFINE_DELAYED_CALL(done);
+	const char *content;
+	int len;
+
+	content = afs_get_link(dentry, d_inode(dentry), &done);
+	if (IS_ERR(content)) {
+		do_delayed_call(&done);
+		return PTR_ERR(content);
+	}
+
+	len = umin(strlen(content), buflen);
+	if (copy_to_user(buffer, content, len))
+		len = -EFAULT;
+	do_delayed_call(&done);
+	return len;
+}
 
 static const struct inode_operations afs_symlink_inode_operations = {
-	.get_link	= page_get_link,
-	.listxattr	= afs_listxattr,
+	.get_link	= afs_get_link,
+	.readlink	= afs_readlink,
 };
 
+static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
+{
+	static unsigned long once_only;
+
+	pr_warn("kAFS: AFS vnode with undefined type %u\n", vnode->status.type);
+	pr_warn("kAFS: A=%d m=%o s=%llx v=%llx\n",
+		vnode->status.abort_code,
+		vnode->status.mode,
+		vnode->status.size,
+		vnode->status.data_version);
+	pr_warn("kAFS: vnode %llx:%llx:%x\n",
+		vnode->fid.vid,
+		vnode->fid.vnode,
+		vnode->fid.unique);
+	if (parent_vnode)
+		pr_warn("kAFS: dir %llx:%llx:%x\n",
+			parent_vnode->fid.vid,
+			parent_vnode->fid.vnode,
+			parent_vnode->fid.unique);
+
+	if (!test_and_set_bit(0, &once_only))
+		dump_stack();
+}
+
+/*
+ * Set parameters for the netfs library
+ */
+static void afs_set_netfs_context(struct afs_vnode *vnode)
+{
+	netfs_inode_init(&vnode->netfs, &afs_req_ops, true);
+}
+
 /*
  * Initialise an inode from the vnode status.
  */
-static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
+static int afs_inode_init_from_status(struct afs_operation *op,
+				      struct afs_vnode_param *vp,
+				      struct afs_vnode *vnode)
 {
+	struct afs_file_status *status = &vp->scb.status;
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
+	struct timespec64 t;
+
+	_enter("{%llx:%llu.%u} %s",
+	       vp->fid.vid, vp->fid.vnode, vp->fid.unique,
+	       op->type ? op->type->name : "???");
 
 	_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
-	       vnode->status.type,
-	       vnode->status.nlink,
-	       (unsigned long long) vnode->status.size,
-	       vnode->status.data_version,
-	       vnode->status.mode);
+	       status->type,
+	       status->nlink,
+	       (unsigned long long) status->size,
+	       status->data_version,
+	       status->mode);
+
+	write_seqlock(&vnode->cb_lock);
 
-	read_seqlock_excl(&vnode->cb_lock);
+	vnode->cb_v_check = op->cb_v_break;
+	vnode->status = *status;
 
-	afs_update_inode_from_status(vnode, &vnode->status, NULL,
-				     AFS_VNODE_NOT_YET_SET);
+	t = status->mtime_client;
+	inode_set_ctime_to_ts(inode, t);
+	inode_set_mtime_to_ts(inode, t);
+	inode_set_atime_to_ts(inode, t);
+	inode->i_flags |= S_NOATIME;
+	inode->i_uid = make_kuid(&init_user_ns, status->owner);
+	inode->i_gid = make_kgid(&init_user_ns, status->group);
+	set_nlink(&vnode->netfs.inode, status->nlink);
 
-	switch (vnode->status.type) {
+	switch (status->type) {
 	case AFS_FTYPE_FILE:
-		inode->i_mode	= S_IFREG | vnode->status.mode;
+		inode->i_mode	= S_IFREG | (status->mode & S_IALLUGO);
 		inode->i_op	= &afs_file_inode_operations;
 		inode->i_fop	= &afs_file_operations;
-		inode->i_mapping->a_ops	= &afs_fs_aops;
+		inode->i_mapping->a_ops	= &afs_file_aops;
+		mapping_set_large_folios(inode->i_mapping);
 		break;
 	case AFS_FTYPE_DIR:
-		inode->i_mode	= S_IFDIR | vnode->status.mode;
+		inode->i_mode	= S_IFDIR |  (status->mode & S_IALLUGO);
 		inode->i_op	= &afs_dir_inode_operations;
 		inode->i_fop	= &afs_dir_file_operations;
 		inode->i_mapping->a_ops	= &afs_dir_aops;
+		__set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
+		/* Assume locally cached directory data will be valid. */
+		__set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
 		break;
 	case AFS_FTYPE_SYMLINK:
 		/* Symlinks with a mode of 0644 are actually mountpoints. */
-		if ((vnode->status.mode & 0777) == 0644) {
+		if ((status->mode & 0777) == 0644) {
 			inode->i_flags |= S_AUTOMOUNT;
 
 			set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
@@ -71,75 +210,291 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
 			inode->i_mode	= S_IFDIR | 0555;
 			inode->i_op	= &afs_mntpt_inode_operations;
 			inode->i_fop	= &afs_mntpt_file_operations;
-			inode->i_mapping->a_ops	= &afs_fs_aops;
 		} else {
-			inode->i_mode	= S_IFLNK | vnode->status.mode;
+			inode->i_mode	= S_IFLNK | status->mode;
 			inode->i_op	= &afs_symlink_inode_operations;
-			inode->i_mapping->a_ops	= &afs_fs_aops;
 		}
+		inode->i_mapping->a_ops	= &afs_dir_aops;
 		inode_nohighmem(inode);
+		mapping_set_release_always(inode->i_mapping);
 		break;
 	default:
-		printk("kAFS: AFS vnode with undefined type\n");
-		read_sequnlock_excl(&vnode->cb_lock);
-		return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
+		dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
+		write_sequnlock(&vnode->cb_lock);
+		return afs_protocol_error(NULL, afs_eproto_file_type);
 	}
 
-	inode->i_blocks		= 0;
-	vnode->invalid_before	= vnode->status.data_version;
+	afs_set_i_size(vnode, status->size);
+	afs_set_netfs_context(vnode);
 
-	read_sequnlock_excl(&vnode->cb_lock);
+	vnode->invalid_before	= status->data_version;
+	trace_afs_set_dv(vnode, status->data_version);
+	inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
+
+	if (!vp->scb.have_cb) {
+		/* it's a symlink we just created (the fileserver
+		 * didn't give us a callback) */
+		afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink);
+	} else {
+		vnode->cb_server = op->server;
+		afs_set_cb_promise(vnode, vp->scb.callback.expires_at,
+				   afs_cb_promise_set_new_inode);
+	}
+
+	write_sequnlock(&vnode->cb_lock);
 	return 0;
 }
 
 /*
- * Fetch file status from the volume.
+ * Update the core inode struct from a returned status record.
  */
-int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
+static void afs_apply_status(struct afs_operation *op,
+			     struct afs_vnode_param *vp)
 {
-	struct afs_fs_cursor fc;
+	struct afs_file_status *status = &vp->scb.status;
+	struct afs_vnode *vnode = vp->vnode;
+	struct inode *inode = &vnode->netfs.inode;
+	struct timespec64 t;
+	umode_t mode;
+	bool unexpected_jump = false;
+	bool data_changed = false;
+	bool change_size = vp->set_size;
+
+	_enter("{%llx:%llu.%u} %s",
+	       vp->fid.vid, vp->fid.vnode, vp->fid.unique,
+	       op->type ? op->type->name : "???");
+
+	BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags));
+
+	if (status->type != vnode->status.type) {
+		pr_warn("Vnode %llx:%llx:%x changed type %u to %u\n",
+			vnode->fid.vid,
+			vnode->fid.vnode,
+			vnode->fid.unique,
+			status->type, vnode->status.type);
+		afs_protocol_error(NULL, afs_eproto_bad_status);
+		return;
+	}
+
+	if (status->nlink != vnode->status.nlink)
+		set_nlink(inode, status->nlink);
+
+	if (status->owner != vnode->status.owner)
+		inode->i_uid = make_kuid(&init_user_ns, status->owner);
+
+	if (status->group != vnode->status.group)
+		inode->i_gid = make_kgid(&init_user_ns, status->group);
+
+	if (status->mode != vnode->status.mode) {
+		mode = inode->i_mode;
+		mode &= ~S_IALLUGO;
+		mode |= status->mode & S_IALLUGO;
+		WRITE_ONCE(inode->i_mode, mode);
+	}
+
+	t = status->mtime_client;
+	inode_set_mtime_to_ts(inode, t);
+	if (vp->update_ctime)
+		inode_set_ctime_to_ts(inode, op->ctime);
+
+	if (vnode->status.data_version != status->data_version) {
+		trace_afs_set_dv(vnode, status->data_version);
+		data_changed = true;
+	}
+
+	vnode->status = *status;
+
+	if (vp->dv_before + vp->dv_delta != status->data_version) {
+		trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta,
+				      status->data_version);
+
+		if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
+		    atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
+			pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
+				vnode->fid.vid, vnode->fid.vnode,
+				(unsigned long long)vp->dv_before + vp->dv_delta,
+				(unsigned long long)status->data_version,
+				op->type ? op->type->name : "???",
+				op->debug_id);
+
+		vnode->invalid_before = status->data_version;
+		if (vnode->status.type == AFS_FTYPE_DIR)
+			afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch);
+		else
+			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+		change_size = true;
+		data_changed = true;
+		unexpected_jump = true;
+	} else if (vnode->status.type == AFS_FTYPE_DIR) {
+		/* Expected directory change is handled elsewhere so
+		 * that we can locally edit the directory and save on a
+		 * download.
+		 */
+		if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+			data_changed = false;
+		change_size = true;
+	}
+
+	if (data_changed) {
+		inode_set_iversion_raw(inode, status->data_version);
+
+		/* Only update the size if the data version jumped.  If the
+		 * file is being modified locally, then we might have our own
+		 * idea of what the size should be that's not the same as
+		 * what's on the server.
+		 */
+		vnode->netfs.remote_i_size = status->size;
+		if (change_size || status->size > i_size_read(inode)) {
+			afs_set_i_size(vnode, status->size);
+			if (unexpected_jump)
+				vnode->netfs.zero_point = status->size;
+			inode_set_ctime_to_ts(inode, t);
+			inode_set_atime_to_ts(inode, t);
+		}
+		if (op->ops == &afs_fetch_data_operation)
+			op->fetch.subreq->rreq->i_size = status->size;
+	}
+}
+
+/*
+ * Apply a callback to a vnode.
+ */
+static void afs_apply_callback(struct afs_operation *op,
+			       struct afs_vnode_param *vp)
+{
+	struct afs_callback *cb = &vp->scb.callback;
+	struct afs_vnode *vnode = vp->vnode;
+
+	if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
+		if (op->volume->type == AFSVL_RWVOL)
+			vnode->cb_server = op->server;
+		afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb);
+	}
+}
+
+/*
+ * Apply the received status and callback to an inode all in the same critical
+ * section to avoid races with afs_validate().
+ */
+void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *vp)
+{
+	struct afs_vnode *vnode = vp->vnode;
+
+	_enter("");
+
+	write_seqlock(&vnode->cb_lock);
+
+	if (vp->scb.have_error) {
+		/* A YFS server will return this from RemoveFile2 and AFS and
+		 * YFS will return this from InlineBulkStatus.
+		 */
+		if (vp->scb.status.abort_code == VNOVNODE) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			clear_nlink(&vnode->netfs.inode);
+			__afs_break_callback(vnode, afs_cb_break_for_deleted);
+			op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
+		}
+	} else if (vp->scb.have_status) {
+		if (vp->speculative &&
+		    (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) ||
+		     vp->dv_before != vnode->status.data_version))
+			/* Ignore the result of a speculative bulk status fetch
+			 * if it splits around a modification op, thereby
+			 * appearing to regress the data version.
+			 */
+			goto out;
+		afs_apply_status(op, vp);
+		if (vp->scb.have_cb)
+			afs_apply_callback(op, vp);
+	} else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+		drop_nlink(&vnode->netfs.inode);
+		if (vnode->netfs.inode.i_nlink == 0) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			__afs_break_callback(vnode, afs_cb_break_for_deleted);
+		}
+	}
+
+out:
+	write_sequnlock(&vnode->cb_lock);
+
+	if (vp->scb.have_status)
+		afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
+}
+
+static void afs_fetch_status_success(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+	struct afs_vnode *vnode = vp->vnode;
 	int ret;
 
+	if (inode_state_read_once(&vnode->netfs.inode) & I_NEW) {
+		ret = afs_inode_init_from_status(op, vp, vnode);
+		afs_op_set_error(op, ret);
+		if (ret == 0)
+			afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
+	} else {
+		afs_vnode_commit_status(op, vp);
+	}
+}
+
+const struct afs_operation_ops afs_fetch_status_operation = {
+	.issue_afs_rpc	= afs_fs_fetch_status,
+	.issue_yfs_rpc	= yfs_fs_fetch_status,
+	.success	= afs_fetch_status_success,
+	.aborted	= afs_check_for_remote_deletion,
+};
+
+/*
+ * Fetch file status from the volume.
+ */
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new,
+		     afs_access_t *_caller_access)
+{
+	struct afs_operation *op;
+
 	_enter("%s,{%llx:%llu.%u,S=%lx}",
 	       vnode->volume->name,
 	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
 	       vnode->flags);
 
-	ret = -ERESTARTSYS;
-	if (afs_begin_vnode_operation(&fc, vnode, key)) {
-		while (afs_select_fileserver(&fc)) {
-			fc.cb_break = afs_calc_vnode_cb_break(vnode);
-			afs_fs_fetch_file_status(&fc, NULL, new_inode);
-		}
+	op = afs_alloc_operation(key, vnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
 
-		afs_check_for_remote_deletion(&fc, fc.vnode);
-		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
-		ret = afs_end_vnode_operation(&fc);
-	}
+	afs_op_set_vnode(op, 0, vnode);
 
-	_leave(" = %d", ret);
-	return ret;
+	op->nr_files	= 1;
+	op->ops		= &afs_fetch_status_operation;
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+
+	if (_caller_access)
+		*_caller_access = op->file[0].scb.status.caller_access;
+	return afs_put_operation(op);
 }
 
 /*
- * iget5() comparator
+ * ilookup() comparator
  */
-int afs_iget5_test(struct inode *inode, void *opaque)
+int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque)
 {
-	struct afs_iget_data *data = opaque;
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_fid *fid = opaque;
 
-	return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
+	return (fid->vnode == vnode->fid.vnode &&
+		fid->vnode_hi == vnode->fid.vnode_hi &&
+		fid->unique == vnode->fid.unique);
 }
 
 /*
- * iget5() comparator for inode created by autocell operations
- *
- * These pseudo inodes don't match anything.
+ * iget5() comparator
  */
-static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque)
+static int afs_iget5_test(struct inode *inode, void *opaque)
 {
-	return 0;
+	struct afs_vnode_param *vp = opaque;
+	//struct afs_vnode *vnode = AFS_FS_I(inode);
+
+	return afs_ilookup5_test_by_fid(inode, &vp->fid);
 }
 
 /*
@@ -147,189 +502,89 @@ static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque)
  */
 static int afs_iget5_set(struct inode *inode, void *opaque)
 {
-	struct afs_iget_data *data = opaque;
+	struct afs_vnode_param *vp = opaque;
+	struct afs_super_info *as = AFS_FS_S(inode->i_sb);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	vnode->fid = data->fid;
-	vnode->volume = data->volume;
+	vnode->volume		= as->volume;
+	vnode->fid		= vp->fid;
 
 	/* YFS supports 96-bit vnode IDs, but Linux only supports
 	 * 64-bit inode numbers.
 	 */
-	inode->i_ino = data->fid.vnode;
-	inode->i_generation = data->fid.unique;
+	inode->i_ino		= vnode->fid.vnode;
+	inode->i_generation	= vnode->fid.unique;
 	return 0;
 }
 
 /*
- * Create an inode for a dynamic root directory or an autocell dynamic
- * automount dir.
- */
-struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
-{
-	struct afs_iget_data data;
-	struct afs_super_info *as;
-	struct afs_vnode *vnode;
-	struct inode *inode;
-	static atomic_t afs_autocell_ino;
-
-	_enter("");
-
-	as = sb->s_fs_info;
-	if (as->volume) {
-		data.volume = as->volume;
-		data.fid.vid = as->volume->vid;
-	}
-	if (root) {
-		data.fid.vnode = 1;
-		data.fid.unique = 1;
-	} else {
-		data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
-		data.fid.unique = 0;
-	}
-
-	inode = iget5_locked(sb, data.fid.vnode,
-			     afs_iget5_pseudo_dir_test, afs_iget5_set,
-			     &data);
-	if (!inode) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	_debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
-	       inode, inode->i_ino, data.fid.vid, data.fid.vnode,
-	       data.fid.unique);
-
-	vnode = AFS_FS_I(inode);
-
-	/* there shouldn't be an existing inode */
-	BUG_ON(!(inode->i_state & I_NEW));
-
-	inode->i_size		= 0;
-	inode->i_mode		= S_IFDIR | S_IRUGO | S_IXUGO;
-	if (root) {
-		inode->i_op	= &afs_dynroot_inode_operations;
-		inode->i_fop	= &afs_dynroot_file_operations;
-	} else {
-		inode->i_op	= &afs_autocell_inode_operations;
-	}
-	set_nlink(inode, 2);
-	inode->i_uid		= GLOBAL_ROOT_UID;
-	inode->i_gid		= GLOBAL_ROOT_GID;
-	inode->i_ctime.tv_sec	= get_seconds();
-	inode->i_ctime.tv_nsec	= 0;
-	inode->i_atime		= inode->i_mtime = inode->i_ctime;
-	inode->i_blocks		= 0;
-	inode_set_iversion_raw(inode, 0);
-	inode->i_generation	= 0;
-
-	set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
-	if (!root) {
-		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-		inode->i_flags |= S_AUTOMOUNT;
-	}
-
-	inode->i_flags |= S_NOATIME;
-	unlock_new_inode(inode);
-	_leave(" = %p", inode);
-	return inode;
-}
-
-/*
  * Get a cache cookie for an inode.
  */
 static void afs_get_inode_cache(struct afs_vnode *vnode)
 {
 #ifdef CONFIG_AFS_FSCACHE
 	struct {
-		u32 vnode_id;
-		u32 unique;
-		u32 vnode_id_ext[2];	/* Allow for a 96-bit key */
+		__be32 vnode_id;
+		__be32 unique;
+		__be32 vnode_id_ext[2];	/* Allow for a 96-bit key */
 	} __packed key;
 	struct afs_vnode_cache_aux aux;
 
-	if (vnode->status.type == AFS_FTYPE_DIR) {
-		vnode->cache = NULL;
+	if (vnode->status.type != AFS_FTYPE_FILE &&
+	    vnode->status.type != AFS_FTYPE_DIR &&
+	    vnode->status.type != AFS_FTYPE_SYMLINK) {
+		vnode->netfs.cache = NULL;
 		return;
 	}
 
-	key.vnode_id		= vnode->fid.vnode;
-	key.unique		= vnode->fid.unique;
-	key.vnode_id_ext[0]	= vnode->fid.vnode >> 32;
-	key.vnode_id_ext[1]	= vnode->fid.vnode_hi;
-	aux.data_version	= vnode->status.data_version;
-
-	vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
-					      &afs_vnode_cache_index_def,
-					      &key, sizeof(key),
-					      &aux, sizeof(aux),
-					      vnode, vnode->status.size, true);
+	key.vnode_id		= htonl(vnode->fid.vnode);
+	key.unique		= htonl(vnode->fid.unique);
+	key.vnode_id_ext[0]	= htonl(vnode->fid.vnode >> 32);
+	key.vnode_id_ext[1]	= htonl(vnode->fid.vnode_hi);
+	afs_set_cache_aux(vnode, &aux);
+
+	afs_vnode_set_cache(vnode,
+			    fscache_acquire_cookie(
+				    vnode->volume->cache,
+				    vnode->status.type == AFS_FTYPE_FILE ?
+				    0 : FSCACHE_ADV_SINGLE_CHUNK,
+				    &key, sizeof(key),
+				    &aux, sizeof(aux),
+				    i_size_read(&vnode->netfs.inode)));
 #endif
 }
 
 /*
  * inode retrieval
  */
-struct inode *afs_iget(struct super_block *sb, struct key *key,
-		       struct afs_fid *fid, struct afs_file_status *status,
-		       struct afs_callback *cb, struct afs_cb_interest *cbi)
+struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp)
 {
-	struct afs_iget_data data = { .fid = *fid };
-	struct afs_super_info *as;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct super_block *sb = dvp->vnode->netfs.inode.i_sb;
 	struct afs_vnode *vnode;
 	struct inode *inode;
 	int ret;
 
-	_enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
-
-	as = sb->s_fs_info;
-	data.volume = as->volume;
+	_enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique);
 
-	inode = iget5_locked(sb, fid->vnode, afs_iget5_test, afs_iget5_set,
-			     &data);
+	inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp);
 	if (!inode) {
 		_leave(" = -ENOMEM");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	_debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
-	       inode, fid->vid, fid->vnode, fid->unique);
-
 	vnode = AFS_FS_I(inode);
 
+	_debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
+	       inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+
 	/* deal with an existing inode */
-	if (!(inode->i_state & I_NEW)) {
+	if (!(inode_state_read_once(inode) & I_NEW)) {
 		_leave(" = %p", inode);
 		return inode;
 	}
 
-	if (!status) {
-		/* it's a remotely extant inode */
-		ret = afs_fetch_status(vnode, key, true);
-		if (ret < 0)
-			goto bad_inode;
-	} else {
-		/* it's an inode we just created */
-		memcpy(&vnode->status, status, sizeof(vnode->status));
-
-		if (!cb) {
-			/* it's a symlink we just created (the fileserver
-			 * didn't give us a callback) */
-			vnode->cb_version = 0;
-			vnode->cb_type = 0;
-			vnode->cb_expires_at = ktime_get();
-		} else {
-			vnode->cb_version = cb->version;
-			vnode->cb_type = cb->type;
-			vnode->cb_expires_at = cb->expires_at;
-			vnode->cb_interest = afs_get_cb_interest(cbi);
-			set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
-		}
-
-		vnode->cb_expires_at += ktime_get_real_seconds();
-	}
-
-	ret = afs_inode_init_from_status(vnode, key);
+	ret = afs_inode_init_from_status(op, vp, vnode);
 	if (ret < 0)
 		goto bad_inode;
 
@@ -337,9 +592,8 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 
 	/* success */
 	clear_bit(AFS_VNODE_UNSET, &vnode->flags);
-	inode->i_flags |= S_NOATIME;
 	unlock_new_inode(inode);
-	_leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
+	_leave(" = %p", inode);
 	return inode;
 
 	/* failure */
@@ -349,141 +603,115 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
-/*
- * mark the data attached to an inode as obsolete due to a write on the server
- * - might also want to ditch all the outstanding writes and dirty pages
- */
-void afs_zap_data(struct afs_vnode *vnode)
+static int afs_iget5_set_root(struct inode *inode, void *opaque)
 {
-	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_invalidate(vnode->cache);
-#endif
+	struct afs_super_info *as = AFS_FS_S(inode->i_sb);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	/* nuke all the non-dirty pages that aren't locked, mapped or being
-	 * written back in a regular file and completely discard the pages in a
-	 * directory or symlink */
-	if (S_ISREG(vnode->vfs_inode.i_mode))
-		invalidate_remote_inode(&vnode->vfs_inode);
-	else
-		invalidate_inode_pages2(vnode->vfs_inode.i_mapping);
+	vnode->volume		= as->volume;
+	vnode->fid.vid		= as->volume->vid;
+	vnode->fid.vnode	= 1;
+	vnode->fid.unique	= 1;
+	inode->i_ino		= 1;
+	inode->i_generation	= 1;
+	return 0;
 }
 
 /*
- * validate a vnode/inode
- * - there are several things we need to check
- *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
- *     symlink)
- *   - parent dir metadata changed (security changes)
- *   - dentry data changed (write, truncate)
- *   - dentry metadata changed (security changes)
+ * Set up the root inode for a volume.  This is always vnode 1, unique 1 within
+ * the volume.
  */
-int afs_validate(struct afs_vnode *vnode, struct key *key)
+struct inode *afs_root_iget(struct super_block *sb, struct key *key)
 {
-	time64_t now = ktime_get_real_seconds();
-	bool valid;
+	struct afs_super_info *as = AFS_FS_S(sb);
+	struct afs_operation *op;
+	struct afs_vnode *vnode;
+	struct inode *inode;
 	int ret;
 
-	_enter("{v={%llx:%llu} fl=%lx},%x",
-	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
-	       key_serial(key));
+	_enter(",{%llx},,", as->volume->vid);
 
-	/* Quickly check the callback state.  Ideally, we'd use read_seqbegin
-	 * here, but we have no way to pass the net namespace to the RCU
-	 * cleanup for the server record.
-	 */
-	read_seqlock_excl(&vnode->cb_lock);
-
-	if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-		if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
-		    vnode->cb_v_break != vnode->volume->cb_v_break) {
-			vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
-			vnode->cb_v_break = vnode->volume->cb_v_break;
-			valid = false;
-		} else if (vnode->status.type == AFS_FTYPE_DIR &&
-			   (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) ||
-			    vnode->cb_expires_at - 10 <= now)) {
-			valid = false;
-		} else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) ||
-			   vnode->cb_expires_at - 10 <= now) {
-			valid = false;
-		} else {
-			valid = true;
-		}
-	} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		valid = true;
-	} else {
-		vnode->cb_v_break = vnode->volume->cb_v_break;
-		valid = false;
+	inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL);
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
 	}
 
-	read_sequnlock_excl(&vnode->cb_lock);
+	_debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid);
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-		clear_nlink(&vnode->vfs_inode);
+	BUG_ON(!(inode_state_read_once(inode) & I_NEW));
 
-	if (valid)
-		goto valid;
-
-	down_write(&vnode->validate_lock);
+	vnode = AFS_FS_I(inode);
+	vnode->cb_v_check = atomic_read(&as->volume->cb_v_break);
+	afs_set_netfs_context(vnode);
 
-	/* if the promise has expired, we need to check the server again to get
-	 * a new promise - note that if the (parent) directory's metadata was
-	 * changed then the security may be different and we may no longer have
-	 * access */
-	if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-		_debug("not promised");
-		ret = afs_fetch_status(vnode, key, false);
-		if (ret < 0) {
-			if (ret == -ENOENT) {
-				set_bit(AFS_VNODE_DELETED, &vnode->flags);
-				ret = -ESTALE;
-			}
-			goto error_unlock;
-		}
-		_debug("new promise [fl=%lx]", vnode->flags);
+	op = afs_alloc_operation(key, as->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
+		goto error;
 	}
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_debug("file already deleted");
-		ret = -ESTALE;
-		goto error_unlock;
-	}
+	afs_op_set_vnode(op, 0, vnode);
 
-	/* if the vnode's data version number changed then its contents are
-	 * different */
-	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
-		afs_zap_data(vnode);
-	up_write(&vnode->validate_lock);
-valid:
-	_leave(" = 0");
-	return 0;
+	op->nr_files	= 1;
+	op->ops		= &afs_fetch_status_operation;
+	ret = afs_do_sync_operation(op);
+	if (ret < 0)
+		goto error;
 
-error_unlock:
-	up_write(&vnode->validate_lock);
-	_leave(" = %d", ret);
-	return ret;
+	afs_get_inode_cache(vnode);
+
+	clear_bit(AFS_VNODE_UNSET, &vnode->flags);
+	unlock_new_inode(inode);
+	_leave(" = %p", inode);
+	return inode;
+
+error:
+	iget_failed(inode);
+	_leave(" = %d [bad]", ret);
+	return ERR_PTR(ret);
 }
 
 /*
  * read the attributes of an inode
  */
-int afs_getattr(const struct path *path, struct kstat *stat,
-		u32 request_mask, unsigned int query_flags)
+int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
+		struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
-	int seq = 0;
+	struct key *key;
+	int ret, seq;
 
 	_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
+	if (vnode->volume &&
+	    !(query_flags & AT_STATX_DONT_SYNC) &&
+	    atomic64_read(&vnode->cb_expires_at) == AFS_NO_CB_PROMISE) {
+		key = afs_request_key(vnode->volume->cell);
+		if (IS_ERR(key))
+			return PTR_ERR(key);
+		ret = afs_validate(vnode, key);
+		key_put(key);
+		if (ret < 0)
+			return ret;
+	}
+
 	do {
-		read_seqbegin_or_lock(&vnode->cb_lock, &seq);
-		generic_fillattr(inode, stat);
-	} while (need_seqretry(&vnode->cb_lock, seq));
+		seq = read_seqbegin(&vnode->cb_lock);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+		if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
+		    stat->nlink > 0)
+			stat->nlink -= 1;
+
+		/* Lie about the size of directories.  We maintain a locally
+		 * edited copy and may make different allocation decisions on
+		 * it, but we need to give userspace the server's size.
+		 */
+		if (S_ISDIR(inode->i_mode))
+			stat->size = vnode->netfs.remote_i_size;
+	} while (read_seqretry(&vnode->cb_lock, seq));
 
-	done_seqretry(&vnode->cb_lock, seq);
 	return 0;
 }
 
@@ -495,9 +723,9 @@ int afs_drop_inode(struct inode *inode)
 	_enter("");
 
 	if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags))
-		return generic_delete_inode(inode);
+		return inode_just_drop(inode);
 	else
-		return generic_drop_inode(inode);
+		return inode_generic_drop(inode);
 }
 
 /*
@@ -505,9 +733,9 @@ int afs_drop_inode(struct inode *inode)
  */
 void afs_evict_inode(struct inode *inode)
 {
-	struct afs_vnode *vnode;
-
-	vnode = AFS_FS_I(inode);
+	struct afs_vnode_cache_aux aux;
+	struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 
 	_enter("{%llx:%llu.%d}",
 	       vnode->fid.vid,
@@ -518,13 +746,26 @@ void afs_evict_inode(struct inode *inode)
 
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
+	if ((S_ISDIR(inode->i_mode) ||
+	     S_ISLNK(inode->i_mode)) &&
+	    (inode_state_read_once(inode) & I_DIRTY) &&
+	    !sbi->dyn_root) {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.for_sync = true,
+			.range_end = LLONG_MAX,
+		};
+
+		afs_single_writepages(inode->i_mapping, &wbc);
+	}
+
+	netfs_wait_for_outstanding_io(inode);
 	truncate_inode_pages_final(&inode->i_data);
-	clear_inode(inode);
+	netfs_free_folioq_buffer(vnode->directory);
 
-	if (vnode->cb_interest) {
-		afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
-		vnode->cb_interest = NULL;
-	}
+	afs_set_cache_aux(vnode, &aux);
+	netfs_clear_inode_writeback(inode, &aux);
+	clear_inode(inode);
 
 	while (!list_empty(&vnode->wb_keys)) {
 		struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
@@ -533,73 +774,156 @@ void afs_evict_inode(struct inode *inode)
 		afs_put_wb_key(wbk);
 	}
 
-#ifdef CONFIG_AFS_FSCACHE
-	{
-		struct afs_vnode_cache_aux aux;
-
-		aux.data_version = vnode->status.data_version;
-		fscache_relinquish_cookie(vnode->cache, &aux,
-					  test_bit(AFS_VNODE_DELETED, &vnode->flags));
-		vnode->cache = NULL;
-	}
-#endif
+	fscache_relinquish_cookie(afs_vnode_cache(vnode),
+				  test_bit(AFS_VNODE_DELETED, &vnode->flags));
 
+	afs_prune_wb_keys(vnode);
 	afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+	key_put(vnode->silly_key);
+	vnode->silly_key = NULL;
 	key_put(vnode->lock_key);
 	vnode->lock_key = NULL;
 	_leave("");
 }
 
+static void afs_setattr_success(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct inode *inode = &vp->vnode->netfs.inode;
+	loff_t old_i_size = i_size_read(inode);
+
+	op->setattr.old_i_size = old_i_size;
+	afs_vnode_commit_status(op, vp);
+	/* inode->i_size has now been changed. */
+
+	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+		loff_t size = op->setattr.attr->ia_size;
+		if (size > old_i_size)
+			pagecache_isize_extended(inode, old_i_size, size);
+	}
+}
+
+static void afs_setattr_edit_file(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_vnode *vnode = vp->vnode;
+	struct inode *inode = &vnode->netfs.inode;
+
+	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+		loff_t size = op->setattr.attr->ia_size;
+		loff_t old = op->setattr.old_i_size;
+
+		/* Note: inode->i_size was updated by afs_apply_status() inside
+		 * the I/O and callback locks.
+		 */
+
+		if (size != old) {
+			truncate_pagecache(inode, size);
+			netfs_resize_file(&vnode->netfs, size, true);
+			fscache_resize_cookie(afs_vnode_cache(vnode), size);
+		}
+	}
+}
+
+static const struct afs_operation_ops afs_setattr_operation = {
+	.issue_afs_rpc	= afs_fs_setattr,
+	.issue_yfs_rpc	= yfs_fs_setattr,
+	.success	= afs_setattr_success,
+	.edit_dir	= afs_setattr_edit_file,
+};
+
 /*
  * set the attributes of an inode
  */
-int afs_setattr(struct dentry *dentry, struct iattr *attr)
+int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+		struct iattr *attr)
 {
-	struct afs_fs_cursor fc;
+	const unsigned int supported =
+		ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
+		ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | ATTR_TOUCH;
+	struct afs_operation *op;
 	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
-	struct key *key;
+	struct inode *inode = &vnode->netfs.inode;
+	loff_t i_size;
 	int ret;
 
 	_enter("{%llx:%llu},{n=%pd},%x",
 	       vnode->fid.vid, vnode->fid.vnode, dentry,
 	       attr->ia_valid);
 
-	if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
-				ATTR_MTIME))) {
+	if (!(attr->ia_valid & supported)) {
 		_leave(" = 0 [unsupported]");
 		return 0;
 	}
 
-	/* flush any dirty data outstanding on a regular file */
-	if (S_ISREG(vnode->vfs_inode.i_mode))
-		filemap_write_and_wait(vnode->vfs_inode.i_mapping);
+	i_size = i_size_read(inode);
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (!S_ISREG(inode->i_mode))
+			return -EISDIR;
 
-	if (attr->ia_valid & ATTR_FILE) {
-		key = afs_file_key(attr->ia_file);
-	} else {
-		key = afs_request_key(vnode->volume->cell);
-		if (IS_ERR(key)) {
-			ret = PTR_ERR(key);
-			goto error;
-		}
+		ret = inode_newsize_ok(inode, attr->ia_size);
+		if (ret)
+			return ret;
+
+		if (attr->ia_size == i_size)
+			attr->ia_valid &= ~ATTR_SIZE;
 	}
 
-	ret = -ERESTARTSYS;
-	if (afs_begin_vnode_operation(&fc, vnode, key)) {
-		while (afs_select_fileserver(&fc)) {
-			fc.cb_break = afs_calc_vnode_cb_break(vnode);
-			afs_fs_setattr(&fc, attr);
+	fscache_use_cookie(afs_vnode_cache(vnode), true);
+
+	/* Prevent any new writebacks from starting whilst we do this. */
+	down_write(&vnode->validate_lock);
+
+	if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) {
+		loff_t size = attr->ia_size;
+
+		/* Wait for any outstanding writes to the server to complete */
+		loff_t from = min(size, i_size);
+		loff_t to = max(size, i_size);
+		ret = filemap_fdatawait_range(inode->i_mapping, from, to);
+		if (ret < 0)
+			goto out_unlock;
+
+		/* Don't talk to the server if we're just shortening in-memory
+		 * writes that haven't gone to the server yet.
+		 */
+		if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
+		    attr->ia_size < i_size &&
+		    attr->ia_size > vnode->netfs.remote_i_size) {
+			truncate_setsize(inode, attr->ia_size);
+			netfs_resize_file(&vnode->netfs, size, false);
+			fscache_resize_cookie(afs_vnode_cache(vnode),
+					      attr->ia_size);
+			ret = 0;
+			goto out_unlock;
 		}
+	}
 
-		afs_check_for_remote_deletion(&fc, fc.vnode);
-		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
-		ret = afs_end_vnode_operation(&fc);
+	op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ?
+				  afs_file_key(attr->ia_file) : NULL),
+				 vnode->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
+		goto out_unlock;
 	}
 
-	if (!(attr->ia_valid & ATTR_FILE))
-		key_put(key);
+	afs_op_set_vnode(op, 0, vnode);
+	op->setattr.attr = attr;
 
-error:
+	if (attr->ia_valid & ATTR_SIZE) {
+		op->file[0].dv_delta = 1;
+		op->file[0].set_size = true;
+	}
+	op->ctime = attr->ia_ctime;
+	op->file[0].update_ctime = 1;
+	op->file[0].modification = true;
+
+	op->ops = &afs_setattr_operation;
+	ret = afs_do_sync_operation(op);
+
+out_unlock:
+	up_write(&vnode->validate_lock);
+	fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
 	_leave(" = %d", ret);
 	return ret;
 }