From 0aec3847d044273733285dcff90afda89ad461d2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 13 Mar 2024 11:08:41 +0000
Subject: afs: Revert "afs: Hide silly-rename files from userspace"

This reverts commit 57e9d49c54528c49b8bffe6d99d782ea051ea534.

This undoes the hiding of .__afsXXXX silly-rename files.  The problem with
hiding them is that rm can't then manually delete them.

This also reverts commit 5f7a07646655fb4108da527565dcdc80124b14c4 ("afs: Fix
endless loop in directory parsing") as that's a bugfix for the above.

Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace")
Reported-by: Markus Suvanto <markus.suvanto@gmail.com>
Link: https://lists.infradead.org/pipermail/linux-afs/2024-February/008102.html
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/3085695.1710328121@warthog.procyon.org.uk
Reviewed-by: Jeffrey E Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/afs/dir.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 8a67fc427e74..67afe68972d5 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -474,16 +474,6 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
 			continue;
 		}
 
-		/* Don't expose silly rename entries to userspace. */
-		if (nlen > 6 &&
-		    dire->u.name[0] == '.' &&
-		    ctx->actor != afs_lookup_filldir &&
-		    ctx->actor != afs_lookup_one_filldir &&
-		    memcmp(dire->u.name, ".__afs", 6) == 0) {
-			ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
-			continue;
-		}
-
 		/* found the next entry */
 		if (!dir_emit(ctx, dire->u.name, nlen,
 			      ntohl(dire->u.vnode),
-- 
cgit 


From 83505bde45e347f1451d007b3ddd7f06cee4c269 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 13 Mar 2024 08:15:02 +0000
Subject: afs: Don't cache preferred address

In the AFS fileserver rotation algorithm, don't cache the preferred address
for the server as that will override the explicit preference if a
non-preferred address responds first.

Fixes: 495f2ae9e355 ("afs: Fix fileserver rotation")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20240313081505.3060173-2-dhowells@redhat.com
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/afs/rotate.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 700a27bc8c25..ed04bd1eeae8 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -602,6 +602,8 @@ iterate_address:
 		goto wait_for_more_probe_results;
 
 	alist = op->estate->addresses;
+	best_prio = -1;
+	addr_index = 0;
 	for (i = 0; i < alist->nr_addrs; i++) {
 		if (alist->addrs[i].prio > best_prio) {
 			addr_index = i;
@@ -609,9 +611,7 @@ iterate_address:
 		}
 	}
 
-	addr_index = READ_ONCE(alist->preferred);
-	if (!test_bit(addr_index, &set))
-		addr_index = __ffs(set);
+	alist->preferred = addr_index;
 
 	op->addr_index = addr_index;
 	set_bit(addr_index, &op->addr_tried);
@@ -656,12 +656,6 @@ wait_for_more_probe_results:
 next_server:
 	trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
 	_debug("next");
-	ASSERT(op->estate);
-	alist = op->estate->addresses;
-	if (op->call_responded &&
-	    op->addr_index != READ_ONCE(alist->preferred) &&
-	    test_bit(alist->preferred, &op->addr_tried))
-		WRITE_ONCE(alist->preferred, op->addr_index);
 	op->estate = NULL;
 	goto pick_server;
 
@@ -690,14 +684,7 @@ no_more_servers:
 failed:
 	trace_afs_rotate(op, afs_rotate_trace_failed, 0);
 	op->flags |= AFS_OPERATION_STOP;
-	if (op->estate) {
-		alist = op->estate->addresses;
-		if (op->call_responded &&
-		    op->addr_index != READ_ONCE(alist->preferred) &&
-		    test_bit(alist->preferred, &op->addr_tried))
-			WRITE_ONCE(alist->preferred, op->addr_index);
-		op->estate = NULL;
-	}
+	op->estate = NULL;
 	_leave(" = f [failed %d]", afs_op_error(op));
 	return false;
 }
-- 
cgit 


From b74c02a37987d3ea755f96119c527f5e91950592 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 13 Mar 2024 08:15:03 +0000
Subject: afs: Fix occasional rmdir-then-VNOVNODE with generic/011

Sometimes generic/011 causes kafs to follow up an FS.RemoveDir RPC call by
spending around a second sending a slew of FS.FetchStatus RPC calls to the
directory just deleted that then abort with VNOVNODE, indicating deletion
of the target directory.

This seems to stem from userspace attempting to stat the directory or
something in it:

    afs_select_fileserver+0x46d/0xaa2
    afs_wait_for_operation+0x12/0x17e
    afs_fetch_status+0x56/0x75
    afs_validate+0xfb/0x240
    afs_permission+0xef/0x1b0
    inode_permission+0x90/0x139
    link_path_walk.part.0.constprop.0+0x6f/0x2f0
    path_lookupat+0x4c/0xfa
    filename_lookup+0x63/0xd7
    vfs_statx+0x62/0x13f
    vfs_fstatat+0x72/0x8a

The issue appears to be that afs_dir_remove_subdir() marks the callback
promise as being cancelled by setting the expiry time to AFS_NO_CB_PROMISE
- which then confuses afs_validate() which sends the FetchStatus to try and
get a new one before it checks for the AFS_VNODE_DELETED flag which
indicates that we know the directory got deleted.

Fix this by:

 (1) Make afs_check_validity() return true if AFS_VNODE_DELETED is set, and
     then tweak the return from afs_validate() if the DELETED flag is set.

 (2) Move the AFS_VNODE_DELETED check in afs_validate() up above the
     expiration check to immediately after we've grabbed the validate_lock.

Fixes: 453924de6212 ("afs: Overhaul invalidation handling to better support RO volumes")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20240313081505.3060173-3-dhowells@redhat.com
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/afs/validation.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/afs/validation.c b/fs/afs/validation.c
index 46b37f2cce7d..32a53fc8dfb2 100644
--- a/fs/afs/validation.c
+++ b/fs/afs/validation.c
@@ -122,6 +122,9 @@ bool afs_check_validity(const struct afs_vnode *vnode)
 	const struct afs_volume *volume = vnode->volume;
 	time64_t deadline = ktime_get_real_seconds() + 10;
 
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+		return true;
+
 	if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
 	    atomic64_read(&vnode->cb_expires_at)  <= deadline ||
 	    volume->cb_expires_at <= deadline ||
@@ -389,12 +392,17 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	       key_serial(key));
 
 	if (afs_check_validity(vnode))
-		return 0;
+		return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
 
 	ret = down_write_killable(&vnode->validate_lock);
 	if (ret < 0)
 		goto error;
 
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		ret = -ESTALE;
+		goto error_unlock;
+	}
+
 	/* Validate a volume after the v_break has changed or the volume
 	 * callback expired.  We only want to do this once per volume per
 	 * v_break change.  The actual work will be done when parsing the
@@ -448,12 +456,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	vnode->cb_ro_snapshot = cb_ro_snapshot;
 	vnode->cb_scrub = cb_scrub;
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_debug("file already deleted");
-		ret = -ESTALE;
-		goto error_unlock;
-	}
-
 	/* if the vnode's data version number changed then its contents are
 	 * different */
 	zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
-- 
cgit 


From 59a55a63c24624c7ad268f12c8f82d142ef6a6d4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 14 Mar 2024 15:24:13 +0100
Subject: fs,block: get holder during claim

Now that we open block devices as files we need to deal with the
realities that closing is a deferred operation. An operation on the
block device such as e.g., freeze, thaw, or removal that runs
concurrently with umount, tries to acquire a stable reference on the
holder. The holder might already be gone though. Make that reliable by
grabbing a passive reference to the holder during bdev_open() and
releasing it during bdev_release().

Fixes: f3a608827d1f ("bdev: open block device as files") # mainline only
Reported-by: Christoph Hellwig <hch@infradead.org>
Link: https://lore.kernel.org/r/ZfEQQ9jZZVes0WCZ@infradead.org
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reported-by: https://lore.kernel.org/r/CAHj4cs8tbDwKRwfS1=DmooP73ysM__xAb2PQc6XsAmWR+VuYmg@mail.gmail.com
Link: https://lore.kernel.org/r/20240315-freibad-annehmbar-ca68c375af91@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/bdev.c           |  7 +++++++
 fs/super.c             | 18 ++++++++++++++++++
 include/linux/blkdev.h | 10 ++++++++++
 3 files changed, 35 insertions(+)

diff --git a/block/bdev.c b/block/bdev.c
index e7adaaf1c219..7a5f611c3d2e 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -583,6 +583,9 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder,
 	mutex_unlock(&bdev->bd_holder_lock);
 	bd_clear_claiming(whole, holder);
 	mutex_unlock(&bdev_lock);
+
+	if (hops && hops->get_holder)
+		hops->get_holder(holder);
 }
 
 /**
@@ -605,6 +608,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
 static void bd_end_claim(struct block_device *bdev, void *holder)
 {
 	struct block_device *whole = bdev_whole(bdev);
+	const struct blk_holder_ops *hops = bdev->bd_holder_ops;
 	bool unblock = false;
 
 	/*
@@ -627,6 +631,9 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
 		whole->bd_holder = NULL;
 	mutex_unlock(&bdev_lock);
 
+	if (hops && hops->put_holder)
+		hops->put_holder(holder);
+
 	/*
 	 * If this was the last claim, remove holder link and unblock evpoll if
 	 * it was a write holder.
diff --git a/fs/super.c b/fs/super.c
index ee05ab6b37e7..71d9779c42b1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1515,11 +1515,29 @@ static int fs_bdev_thaw(struct block_device *bdev)
 	return error;
 }
 
+static void fs_bdev_super_get(void *data)
+{
+	struct super_block *sb = data;
+
+	spin_lock(&sb_lock);
+	sb->s_count++;
+	spin_unlock(&sb_lock);
+}
+
+static void fs_bdev_super_put(void *data)
+{
+	struct super_block *sb = data;
+
+	put_super(sb);
+}
+
 const struct blk_holder_ops fs_holder_ops = {
 	.mark_dead		= fs_bdev_mark_dead,
 	.sync			= fs_bdev_sync,
 	.freeze			= fs_bdev_freeze,
 	.thaw			= fs_bdev_thaw,
+	.get_holder		= fs_bdev_super_get,
+	.put_holder		= fs_bdev_super_put,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f9b87c39cab0..c3e8f7cf96be 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1505,6 +1505,16 @@ struct blk_holder_ops {
 	 * Thaw the file system mounted on the block device.
 	 */
 	int (*thaw)(struct block_device *bdev);
+
+	/*
+	 * If needed, get a reference to the holder.
+	 */
+	void (*get_holder)(void *holder);
+
+	/*
+	 * Release the holder.
+	 */
+	void (*put_holder)(void *holder);
 };
 
 /*
-- 
cgit 


From 449ac5514631dd9b9b66dd708dd5beb1428e2812 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Mar 2024 14:48:26 +0000
Subject: fscache: Fix error handling in fscache_begin_operation()

Fix fscache_begin_operation() to clear cres->cache_priv on error, otherwise
fscache_resources_valid() will report it as being valid.

Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/3933237.1710514106@warthog.procyon.org.uk
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reported-by: Marc Dionne <marc.dionne@auristor.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/fscache_io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index ad572f7ee897..43a651ed8264 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -83,8 +83,10 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres,
 	cres->debug_id		= cookie->debug_id;
 	cres->inval_counter	= cookie->inval_counter;
 
-	if (!fscache_begin_cookie_access(cookie, why))
+	if (!fscache_begin_cookie_access(cookie, why)) {
+		cres->cache_priv = NULL;
 		return -ENOBUFS;
+	}
 
 again:
 	spin_lock(&cookie->lock);
-- 
cgit