From 3f4aa981816368fe6b1d13c2bfbe76df9687e787 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 13 Jun 2020 00:03:48 +0100
Subject: afs: Fix EOF corruption

When doing a partial writeback, afs_write_back_from_locked_page() may
generate an FS.StoreData RPC request that writes out part of a file when a
file has been constructed from pieces by doing seek, write, seek, write,
... as is done by ld.

The FS.StoreData RPC is given the current i_size as the file length, but
the server basically ignores it unless the data length is 0 (in which case
it's just a truncate operation).  The revised file length returned in the
result of the RPC may then not reflect what we suggested - and this leads
to i_size getting moved backwards - which causes issues later.

Fix the client to take account of this by ignoring the returned file size
unless the data version number jumped unexpectedly - in which case we're
going to have to clear the pagecache and reload anyway.

This can be observed when doing a kernel build on an AFS mount.  The
following pair of commands produce the issue:

  ld -m elf_x86_64 -z max-page-size=0x200000 --emit-relocs \
      -T arch/x86/realmode/rm/realmode.lds \
      arch/x86/realmode/rm/header.o \
      arch/x86/realmode/rm/trampoline_64.o \
      arch/x86/realmode/rm/stack.o \
      arch/x86/realmode/rm/reboot.o \
      -o arch/x86/realmode/rm/realmode.elf
  arch/x86/tools/relocs --realmode \
      arch/x86/realmode/rm/realmode.elf \
      >arch/x86/realmode/rm/realmode.relocs

This results in the latter giving:

	Cannot read ELF section headers 0/18: Success

as the realmode.elf file got corrupted.

The sequence of events can also be driven with:

	xfs_io -t -f \
		-c "pwrite -S 0x58 0 0x58" \
		-c "pwrite -S 0x59 10000 1000" \
		-c "close" \
		/afs/example.com/scratch/a

Fixes: 31143d5d515e ("AFS: implement basic file write support")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'fs/afs/inode.c')

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index cd0a0060950b..8d10bfb392d1 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -168,6 +168,7 @@ static void afs_apply_status(struct afs_operation *op,
 	struct timespec64 t;
 	umode_t mode;
 	bool data_changed = false;
+	bool change_size = false;
 
 	_enter("{%llx:%llu.%u} %s",
 	       vp->fid.vid, vp->fid.vnode, vp->fid.unique,
@@ -226,6 +227,7 @@ static void afs_apply_status(struct afs_operation *op,
 		} else {
 			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
 		}
+		change_size = true;
 	} else if (vnode->status.type == AFS_FTYPE_DIR) {
 		/* Expected directory change is handled elsewhere so
 		 * that we can locally edit the directory and save on a
@@ -233,11 +235,19 @@ static void afs_apply_status(struct afs_operation *op,
 		 */
 		if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
 			data_changed = false;
+		change_size = true;
 	}
 
 	if (data_changed) {
 		inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
-		afs_set_i_size(vnode, status->size);
+
+		/* Only update the size if the data version jumped.  If the
+		 * file is being modified locally, then we might have our own
+		 * idea of what the size should be that's not the same as
+		 * what's on the server.
+		 */
+		if (change_size)
+			afs_set_i_size(vnode, status->size);
 	}
 }
 
-- 
cgit 


From da8d07551275abb3a38fae2d16e02bc9cc7396b2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 13 Jun 2020 19:34:59 +0100
Subject: afs: Concoct ctimes

The in-kernel afs filesystem ignores ctime because the AFS fileserver
protocol doesn't support ctimes.  This, however, causes various xfstests to
fail.

Work around this by:

 (1) Setting ctime to attr->ia_ctime in afs_setattr().

 (2) Not ignoring ATTR_MTIME_SET, ATTR_TIMES_SET and ATTR_TOUCH settings.

 (3) Setting the ctime from the server mtime when on the target file when
     creating a hard link to it.

 (4) Setting the ctime on directories from their revised mtimes when
     renaming/moving a file.

Found by the generic/221 and generic/309 xfstests.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'fs/afs/inode.c')

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 8d10bfb392d1..e99705474dd1 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -165,6 +165,7 @@ static void afs_apply_status(struct afs_operation *op,
 {
 	struct afs_file_status *status = &vp->scb.status;
 	struct afs_vnode *vnode = vp->vnode;
+	struct inode *inode = &vnode->vfs_inode;
 	struct timespec64 t;
 	umode_t mode;
 	bool data_changed = false;
@@ -187,25 +188,25 @@ static void afs_apply_status(struct afs_operation *op,
 	}
 
 	if (status->nlink != vnode->status.nlink)
-		set_nlink(&vnode->vfs_inode, status->nlink);
+		set_nlink(inode, status->nlink);
 
 	if (status->owner != vnode->status.owner)
-		vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
+		inode->i_uid = make_kuid(&init_user_ns, status->owner);
 
 	if (status->group != vnode->status.group)
-		vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
+		inode->i_gid = make_kgid(&init_user_ns, status->group);
 
 	if (status->mode != vnode->status.mode) {
-		mode = vnode->vfs_inode.i_mode;
+		mode = inode->i_mode;
 		mode &= ~S_IALLUGO;
 		mode |= status->mode;
-		WRITE_ONCE(vnode->vfs_inode.i_mode, mode);
+		WRITE_ONCE(inode->i_mode, mode);
 	}
 
 	t = status->mtime_client;
-	vnode->vfs_inode.i_ctime = t;
-	vnode->vfs_inode.i_mtime = t;
-	vnode->vfs_inode.i_atime = t;
+	inode->i_mtime = t;
+	if (vp->update_ctime)
+		inode->i_ctime = op->ctime;
 
 	if (vnode->status.data_version != status->data_version)
 		data_changed = true;
@@ -239,15 +240,18 @@ static void afs_apply_status(struct afs_operation *op,
 	}
 
 	if (data_changed) {
-		inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+		inode_set_iversion_raw(inode, status->data_version);
 
 		/* Only update the size if the data version jumped.  If the
 		 * file is being modified locally, then we might have our own
 		 * idea of what the size should be that's not the same as
 		 * what's on the server.
 		 */
-		if (change_size)
+		if (change_size) {
 			afs_set_i_size(vnode, status->size);
+			inode->i_ctime = t;
+			inode->i_atime = t;
+		}
 	}
 }
 
@@ -817,7 +821,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 	       attr->ia_valid);
 
 	if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
-				ATTR_MTIME))) {
+				ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET |
+				ATTR_TOUCH))) {
 		_leave(" = 0 [unsupported]");
 		return 0;
 	}
@@ -837,6 +842,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if (attr->ia_valid & ATTR_SIZE)
 		op->file[0].dv_delta = 1;
+	op->ctime = attr->ia_ctime;
+	op->file[0].update_ctime = 1;
 
 	op->ops = &afs_setattr_operation;
 	return afs_do_sync_operation(op);
-- 
cgit 


From 793fe82ee33aab1023cf023cd7d744af19a3dff9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Jun 2020 16:13:52 +0100
Subject: afs: Fix truncation issues and mmap writeback size

Fix the following issues:

 (1) Fix writeback to reduce the size of a store operation to i_size,
     effectively discarding the extra data.

     The problem comes when afs_page_mkwrite() records that a page is about
     to be modified by mmap().  It doesn't know what bits of the page are
     going to be modified, so it records the whole page as being dirty
     (this is stored in page->private as start and end offsets).

     Without this, the marshalling for the store to the server extends the
     size of the file to the end of the page (in afs_fs_store_data() and
     yfs_fs_store_data()).

 (2) Fix setattr to actually truncate the pagecache, thereby clearing
     the discarded part of a file.

 (3) Fix setattr to check that the new size is okay and to disable
     ATTR_SIZE if i_size wouldn't change.

 (4) Force i_size to be updated as the result of a truncate.

 (5) Don't truncate if ATTR_SIZE is not set.

 (6) Call pagecache_isize_extended() if the file was enlarged.

Note that truncate_set_size() isn't used because the setting of i_size is
done inside afs_vnode_commit_status() under the vnode->cb_lock.

Found with the generic/029 and generic/393 xfstests.

Fixes: 31143d5d515e ("AFS: implement basic file write support")
Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'fs/afs/inode.c')

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e99705474dd1..70c925978d10 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -169,7 +169,7 @@ static void afs_apply_status(struct afs_operation *op,
 	struct timespec64 t;
 	umode_t mode;
 	bool data_changed = false;
-	bool change_size = false;
+	bool change_size = vp->set_size;
 
 	_enter("{%llx:%llu.%u} %s",
 	       vp->fid.vid, vp->fid.vnode, vp->fid.unique,
@@ -799,7 +799,15 @@ void afs_evict_inode(struct inode *inode)
 
 static void afs_setattr_success(struct afs_operation *op)
 {
+	struct inode *inode = &op->file[0].vnode->vfs_inode;
+
 	afs_vnode_commit_status(op, &op->file[0]);
+	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+		loff_t i_size = inode->i_size, size = op->setattr.attr->ia_size;
+		if (size > i_size)
+			pagecache_isize_extended(inode, i_size, size);
+		truncate_pagecache(inode, size);
+	}
 }
 
 static const struct afs_operation_ops afs_setattr_operation = {
@@ -815,6 +823,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct afs_operation *op;
 	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+	int ret;
 
 	_enter("{%llx:%llu},{n=%pd},%x",
 	       vnode->fid.vid, vnode->fid.vnode, dentry,
@@ -827,6 +836,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 		return 0;
 	}
 
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (!S_ISREG(vnode->vfs_inode.i_mode))
+			return -EISDIR;
+
+		ret = inode_newsize_ok(&vnode->vfs_inode, attr->ia_size);
+		if (ret)
+			return ret;
+
+		if (attr->ia_size == i_size_read(&vnode->vfs_inode))
+			attr->ia_valid &= ~ATTR_SIZE;
+	}
+
 	/* flush any dirty data outstanding on a regular file */
 	if (S_ISREG(vnode->vfs_inode.i_mode))
 		filemap_write_and_wait(vnode->vfs_inode.i_mapping);
@@ -840,8 +861,10 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 	afs_op_set_vnode(op, 0, vnode);
 	op->setattr.attr = attr;
 
-	if (attr->ia_valid & ATTR_SIZE)
+	if (attr->ia_valid & ATTR_SIZE) {
 		op->file[0].dv_delta = 1;
+		op->file[0].set_size = true;
+	}
 	op->ctime = attr->ia_ctime;
 	op->file[0].update_ctime = 1;
 
-- 
cgit 


From 728279a5a1fd9fa9fa268f807391c4d19ad2822c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Jun 2020 00:34:09 +0100
Subject: afs: Fix use of afs_check_for_remote_deletion()

afs_check_for_remote_deletion() checks to see if error ENOENT is returned
by the server in response to an operation and, if so, marks the primary
vnode as having been deleted as the FID is no longer valid.

However, it's being called from the operation success functions, where no
abort has happened - and if an inline abort is recorded, it's handled by
afs_vnode_commit_status().

Fix this by actually calling the operation aborted method if provided and
having that point to afs_check_for_remote_deletion().

Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/afs/inode.c')

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 70c925978d10..56e60d561f37 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -324,6 +324,7 @@ static const struct afs_operation_ops afs_fetch_status_operation = {
 	.issue_afs_rpc	= afs_fs_fetch_status,
 	.issue_yfs_rpc	= yfs_fs_fetch_status,
 	.success	= afs_fetch_status_success,
+	.aborted	= afs_check_for_remote_deletion,
 };
 
 /*
-- 
cgit 


From 7c295eec1e351003a8ca06c34f9e79336fa5b244 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Jun 2020 00:52:30 +0100
Subject: afs: afs_vnode_commit_status() doesn't need to check the RPC error

afs_vnode_commit_status() is only ever called if op->error is 0, so remove
the op->error checks from the function.

Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs/afs/inode.c')

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 56e60d561f37..d5d0ae7b2b1e 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -281,8 +281,6 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
 
 	_enter("");
 
-	ASSERTCMP(op->error, ==, 0);
-
 	write_seqlock(&vnode->cb_lock);
 
 	if (vp->scb.have_error) {
@@ -300,7 +298,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
 
 	write_sequnlock(&vnode->cb_lock);
 
-	if (op->error == 0 && vp->scb.have_status)
+	if (vp->scb.have_status)
 		afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
 }
 
-- 
cgit 


From b6489a49f7b71964e37978d6f89bbdbdb263f6f5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 15 Jun 2020 17:36:58 +0100
Subject: afs: Fix silly rename

Fix AFS's silly rename by the following means:

 (1) Set the destination directory in afs_do_silly_rename() so as to avoid
     misbehaviour and indicate that the directory data version will
     increment by 1 so as to avoid warnings about unexpected changes in the
     DV.  Also indicate that the ctime should be updated to avoid xfstest
     grumbling.

 (2) Note when the server indicates that a directory changed more than we
     expected (AFS_OPERATION_DIR_CONFLICT), indicating a conflict with a
     third party change, checking on successful completion of unlink and
     rename.

     The problem is that the FS.RemoveFile RPC op doesn't report the status
     of the unlinked file, though YFS.RemoveFile2 does.  This can be
     mitigated by the assumption that if the directory DV cranked by
     exactly 1, we can be sure we removed one link from the file; further,
     ordinarily in AFS, files cannot be hardlinked across directories, so
     if we reduce nlink to 0, the file is deleted.

     However, if the directory DV jumps by more than 1, we cannot know if a
     third party intervened by adding or removing a link on the file we
     just removed a link from.

     The same also goes for any vnode that is at the destination of the
     FS.Rename RPC op.

 (3) Make afs_vnode_commit_status() apply the nlink drop inside the cb_lock
     section along with the other attribute updates if ->op_unlinked is set
     on the descriptor for the appropriate vnode.

 (4) Issue a follow up status fetch to the unlinked file in the event of a
     third party conflict that makes it impossible for us to know if we
     actually deleted the file or not.

 (5) Provide a flag, AFS_VNODE_SILLY_DELETED, to make afs_getattr() lie to
     the user about the nlink of a silly deleted file so that it appears as
     0, not 1.

Found with the generic/035 and generic/084 xfstests.

Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs/afs/inode.c')

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d5d0ae7b2b1e..1d13d2e882ad 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -284,16 +284,25 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
 	write_seqlock(&vnode->cb_lock);
 
 	if (vp->scb.have_error) {
+		/* A YFS server will return this from RemoveFile2 and AFS and
+		 * YFS will return this from InlineBulkStatus.
+		 */
 		if (vp->scb.status.abort_code == VNOVNODE) {
 			set_bit(AFS_VNODE_DELETED, &vnode->flags);
 			clear_nlink(&vnode->vfs_inode);
 			__afs_break_callback(vnode, afs_cb_break_for_deleted);
+			op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
 		}
-	} else {
-		if (vp->scb.have_status)
-			afs_apply_status(op, vp);
+	} else if (vp->scb.have_status) {
+		afs_apply_status(op, vp);
 		if (vp->scb.have_cb)
 			afs_apply_callback(op, vp);
+	} else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+		drop_nlink(&vnode->vfs_inode);
+		if (vnode->vfs_inode.i_nlink == 0) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			__afs_break_callback(vnode, afs_cb_break_for_deleted);
+		}
 	}
 
 	write_sequnlock(&vnode->cb_lock);
@@ -304,7 +313,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
 
 static void afs_fetch_status_success(struct afs_operation *op)
 {
-	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
 	struct afs_vnode *vnode = vp->vnode;
 	int ret;
 
@@ -318,7 +327,7 @@ static void afs_fetch_status_success(struct afs_operation *op)
 	}
 }
 
-static const struct afs_operation_ops afs_fetch_status_operation = {
+const struct afs_operation_ops afs_fetch_status_operation = {
 	.issue_afs_rpc	= afs_fs_fetch_status,
 	.issue_yfs_rpc	= yfs_fs_fetch_status,
 	.success	= afs_fetch_status_success,
@@ -729,6 +738,9 @@ int afs_getattr(const struct path *path, struct kstat *stat,
 	do {
 		read_seqbegin_or_lock(&vnode->cb_lock, &seq);
 		generic_fillattr(inode, stat);
+		if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
+		    stat->nlink > 0)
+			stat->nlink -= 1;
 	} while (need_seqretry(&vnode->cb_lock, seq));
 
 	done_seqretry(&vnode->cb_lock, seq);
-- 
cgit