summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/extent_io.c29
-rw-r--r--fs/btrfs/extent_map.c83
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/ordered-data.c12
-rw-r--r--fs/btrfs/qgroup.c11
-rw-r--r--fs/btrfs/tests/delayed-refs-tests.c1
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/erofs/zdata.c2
-rw-r--r--fs/file_table.c16
-rw-r--r--fs/fuse/dev.c6
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c13
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/iomap/direct-io.c8
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c54
-rw-r--r--fs/netfs/buffered_read.c19
-rw-r--r--fs/netfs/internal.h4
-rw-r--r--fs/netfs/read_collect.c6
-rw-r--r--fs/netfs/read_retry.c43
-rw-r--r--fs/netfs/stats.c9
-rw-r--r--fs/netfs/write_issue.c1
-rw-r--r--fs/netfs/write_retry.c2
-rw-r--r--fs/nfs/delegation.c37
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/direct.c23
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfsd/filecache.c11
-rw-r--r--fs/nfsd/nfs2acl.c2
-rw-r--r--fs/nfsd/nfs3acl.c2
-rw-r--r--fs/nfsd/nfs4callback.c9
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfsfh.c5
-rw-r--r--fs/notify/fsnotify.c18
-rw-r--r--fs/nsfs.c1
-rw-r--r--fs/open.c11
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/pidfs.c13
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/smb/client/cifsglob.h20
-rw-r--r--fs/smb/client/dfs.c30
-rw-r--r--fs/smb/client/dfs.h7
-rw-r--r--fs/smb/client/dfs_cache.c27
-rw-r--r--fs/smb/client/file.c7
-rw-r--r--fs/smb/client/inode.c17
-rw-r--r--fs/smb/client/reparse.c5
-rw-r--r--fs/smb/client/reparse.h28
-rw-r--r--fs/smb/client/smb1ops.c2
-rw-r--r--fs/smb/client/smb2inode.c4
-rw-r--r--fs/smb/client/smb2ops.c25
-rw-r--r--fs/smb/client/smb2pdu.c4
-rw-r--r--fs/smb/client/smb2proto.h2
-rw-r--r--fs/smb/common/smb2pdu.h30
-rw-r--r--fs/smb/common/smbfsctl.h3
-rw-r--r--fs/stat.c4
-rw-r--r--fs/vboxsf/super.c3
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c13
-rw-r--r--fs/xfs/scrub/common.h5
-rw-r--r--fs/xfs/scrub/inode_repair.c12
-rw-r--r--fs/xfs/scrub/repair.h11
-rw-r--r--fs/xfs/scrub/scrub.c12
-rw-r--r--fs/xfs/xfs_aops.c41
-rw-r--r--fs/xfs/xfs_buf.c36
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_exchrange.c71
-rw-r--r--fs/xfs/xfs_inode.c7
-rw-r--r--fs/xfs/xfs_iomap.c6
-rw-r--r--fs/xfs/xfs_qm_bhv.c55
-rw-r--r--fs/xfs/xfs_super.c8
75 files changed, 712 insertions, 326 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 92071ca0655f..3dc5a35dd19b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (!p->skip_locking) {
btrfs_unlock_up_safe(p, parent_level + 1);
+ btrfs_maybe_reset_lockdep_class(root, tmp);
tmp_locked = true;
btrfs_tree_read_lock(tmp);
btrfs_release_path(p);
@@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (!p->skip_locking) {
ASSERT(ret == -EAGAIN);
+ btrfs_maybe_reset_lockdep_class(root, tmp);
tmp_locked = true;
btrfs_tree_read_lock(tmp);
btrfs_release_path(p);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a2cac9d0a1a9..b2fae67f8fa3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -523,8 +523,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
u64 end;
u32 len;
- /* For now only order 0 folios are supported for data. */
- ASSERT(folio_order(folio) == 0);
btrfs_debug(fs_info,
"%s: bi_sector=%llu, err=%d, mirror=%u",
__func__, bio->bi_iter.bi_sector, bio->bi_status,
@@ -552,7 +550,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> folio_shift(folio);
/*
* Zero out the remaining part if this range straddles
@@ -561,9 +558,11 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* Here we should only zero the range inside the folio,
* not touch anything else.
*
- * NOTE: i_size is exclusive while end is inclusive.
+ * NOTE: i_size is exclusive while end is inclusive and
+ * folio_contains() takes PAGE_SIZE units.
*/
- if (folio_index(folio) == end_index && i_size <= end) {
+ if (folio_contains(folio, i_size >> PAGE_SHIFT) &&
+ i_size <= end) {
u32 zero_start = max(offset_in_folio(folio, i_size),
offset_in_folio(folio, start));
u32 zero_len = offset_in_folio(folio, end) + 1 -
@@ -899,7 +898,6 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode,
u64 len, struct extent_map **em_cached)
{
struct extent_map *em;
- struct extent_state *cached_state = NULL;
ASSERT(em_cached);
@@ -915,14 +913,12 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode,
*em_cached = NULL;
}
- btrfs_lock_and_flush_ordered_range(inode, start, start + len - 1, &cached_state);
em = btrfs_get_extent(inode, folio, start, len);
if (!IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
}
- unlock_extent(&inode->io_tree, start, start + len - 1, &cached_state);
return em;
}
@@ -956,7 +952,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
return ret;
}
- if (folio->index == last_byte >> folio_shift(folio)) {
+ if (folio_contains(folio, last_byte >> PAGE_SHIFT)) {
size_t zero_offset = offset_in_folio(folio, last_byte);
if (zero_offset) {
@@ -1079,11 +1075,18 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
int btrfs_read_folio(struct file *file, struct folio *folio)
{
+ struct btrfs_inode *inode = folio_to_inode(folio);
+ const u64 start = folio_pos(folio);
+ const u64 end = start + folio_size(folio) - 1;
+ struct extent_state *cached_state = NULL;
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
struct extent_map *em_cached = NULL;
int ret;
+ btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state);
ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
+ unlock_extent(&inode->io_tree, start, end, &cached_state);
+
free_extent_map(em_cached);
/*
@@ -2380,12 +2383,20 @@ void btrfs_readahead(struct readahead_control *rac)
{
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD };
struct folio *folio;
+ struct btrfs_inode *inode = BTRFS_I(rac->mapping->host);
+ const u64 start = readahead_pos(rac);
+ const u64 end = start + readahead_length(rac) - 1;
+ struct extent_state *cached_state = NULL;
struct extent_map *em_cached = NULL;
u64 prev_em_start = (u64)-1;
+ btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state);
+
while ((folio = readahead_folio(rac)) != NULL)
btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
+ unlock_extent(&inode->io_tree, start, end, &cached_state);
+
if (em_cached)
free_extent_map(em_cached);
submit_one_bio(&bio_ctrl);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 67ce85ff0ae2..7f46abbd6311 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1128,6 +1128,8 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
long nr_dropped = 0;
struct rb_node *node;
+ lockdep_assert_held_write(&tree->lock);
+
/*
* Take the mmap lock so that we serialize with the inode logging phase
* of fsync because we may need to set the full sync flag on the inode,
@@ -1139,28 +1141,12 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
* to find new extents, which may not be there yet because ordered
* extents haven't completed yet.
*
- * We also do a try lock because otherwise we could deadlock. This is
- * because the shrinker for this filesystem may be invoked while we are
- * in a path that is holding the mmap lock in write mode. For example in
- * a reflink operation while COWing an extent buffer, when allocating
- * pages for a new extent buffer and under memory pressure, the shrinker
- * may be invoked, and therefore we would deadlock by attempting to read
- * lock the mmap lock while we are holding already a write lock on it.
+ * We also do a try lock because we don't want to block for too long and
+ * we are holding the extent map tree's lock in write mode.
*/
if (!down_read_trylock(&inode->i_mmap_lock))
return 0;
- /*
- * We want to be fast so if the lock is busy we don't want to spend time
- * waiting for it - either some task is about to do IO for the inode or
- * we may have another task shrinking extent maps, here in this code, so
- * skip this inode.
- */
- if (!write_trylock(&tree->lock)) {
- up_read(&inode->i_mmap_lock);
- return 0;
- }
-
node = rb_first(&tree->root);
while (node) {
struct rb_node *next = rb_next(node);
@@ -1201,12 +1187,61 @@ next:
break;
node = next;
}
- write_unlock(&tree->lock);
up_read(&inode->i_mmap_lock);
return nr_dropped;
}
+static struct btrfs_inode *find_first_inode_to_shrink(struct btrfs_root *root,
+ u64 min_ino)
+{
+ struct btrfs_inode *inode;
+ unsigned long from = min_ino;
+
+ xa_lock(&root->inodes);
+ while (true) {
+ struct extent_map_tree *tree;
+
+ inode = xa_find(&root->inodes, &from, ULONG_MAX, XA_PRESENT);
+ if (!inode)
+ break;
+
+ tree = &inode->extent_tree;
+
+ /*
+ * We want to be fast so if the lock is busy we don't want to
+ * spend time waiting for it (some task is about to do IO for
+ * the inode).
+ */
+ if (!write_trylock(&tree->lock))
+ goto next;
+
+ /*
+ * Skip inode if it doesn't have loaded extent maps, so we avoid
+ * getting a reference and doing an iput later. This includes
+ * cases like files that were opened for things like stat(2), or
+ * files with all extent maps previously released through the
+ * release folio callback (btrfs_release_folio()) or released in
+ * a previous run, or directories which never have extent maps.
+ */
+ if (RB_EMPTY_ROOT(&tree->root)) {
+ write_unlock(&tree->lock);
+ goto next;
+ }
+
+ if (igrab(&inode->vfs_inode))
+ break;
+
+ write_unlock(&tree->lock);
+next:
+ from = btrfs_ino(inode) + 1;
+ cond_resched_lock(&root->inodes.xa_lock);
+ }
+ xa_unlock(&root->inodes);
+
+ return inode;
+}
+
static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1214,21 +1249,21 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx
long nr_dropped = 0;
u64 min_ino = fs_info->em_shrinker_last_ino + 1;
- inode = btrfs_find_first_inode(root, min_ino);
+ inode = find_first_inode_to_shrink(root, min_ino);
while (inode) {
nr_dropped += btrfs_scan_inode(inode, ctx);
+ write_unlock(&inode->extent_tree.lock);
min_ino = btrfs_ino(inode) + 1;
fs_info->em_shrinker_last_ino = btrfs_ino(inode);
- btrfs_add_delayed_iput(inode);
+ iput(&inode->vfs_inode);
- if (ctx->scanned >= ctx->nr_to_scan ||
- btrfs_fs_closing(inode->root->fs_info))
+ if (ctx->scanned >= ctx->nr_to_scan || btrfs_fs_closing(fs_info))
break;
cond_resched();
- inode = btrfs_find_first_inode(root, min_ino);
+ inode = find_first_inode_to_shrink(root, min_ino);
}
if (inode) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..0b568c8d24cb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
- loff_t start_pos;
/*
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
@@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
inode_inc_iversion(inode);
}
- start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
- if (start_pos > oldsize) {
+ if (pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
@@ -1092,7 +1090,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
u64 lockend;
size_t num_written = 0;
ssize_t ret;
- loff_t old_isize = i_size_read(inode);
+ loff_t old_isize;
unsigned int ilock_flags = 0;
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0);
@@ -1105,6 +1103,13 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
if (ret < 0)
return ret;
+ /*
+ * We can only trust the isize with inode lock held, or it can race with
+ * other buffered writes and cause incorrect call of
+ * pagecache_isize_extended() to overwrite existing data.
+ */
+ old_isize = i_size_read(inode);
+
ret = generic_write_checks(iocb, i);
if (ret <= 0)
goto out;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 30eceaf829a7..4aca7475fd82 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
*/
if (WARN_ON_ONCE(len >= ordered->num_bytes))
return ERR_PTR(-EINVAL);
+ /*
+ * If our ordered extent had an error there's no point in continuing.
+ * The error may have come from a transaction abort done either by this
+ * task or some other concurrent task, and the transaction abort path
+ * iterates over all existing ordered extents and sets the flag
+ * BTRFS_ORDERED_IOERR on them.
+ */
+ if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) {
+ const int fs_error = BTRFS_FS_ERROR(fs_info);
+
+ return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO);
+ }
/* We cannot split partially completed ordered extents. */
if (ordered->bytes_left) {
ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b90fabe302e6..f9d3766c809b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su
* Commit current transaction to make sure all the rfer/excl numbers
* get updated.
*/
- trans = btrfs_start_transaction(fs_info->quota_root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = btrfs_commit_transaction(trans);
+ ret = btrfs_commit_current_transaction(fs_info->quota_root);
if (ret < 0)
return ret;
@@ -1897,8 +1893,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su
/*
* It's squota and the subvolume still has numbers needed for future
* accounting, in this case we can not delete it. Just skip it.
+ *
+ * Or the qgroup is already removed by a qgroup rescan. For both cases we're
+ * safe to ignore them.
*/
- if (ret == -EBUSY)
+ if (ret == -EBUSY || ret == -ENOENT)
ret = 0;
return ret;
}
diff --git a/fs/btrfs/tests/delayed-refs-tests.c b/fs/btrfs/tests/delayed-refs-tests.c
index 6558508c2ddf..265370e79a54 100644
--- a/fs/btrfs/tests/delayed-refs-tests.c
+++ b/fs/btrfs/tests/delayed-refs-tests.c
@@ -1009,6 +1009,7 @@ int btrfs_test_delayed_refs(u32 sectorsize, u32 nodesize)
if (!ret)
ret = select_delayed_refs_test(&trans);
+ kfree(transaction);
out_free_fs_info:
btrfs_free_dummy_fs_info(fs_info);
return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 15312013f2a3..aca83a98b75a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -274,8 +274,10 @@ loop:
cur_trans = fs_info->running_transaction;
if (cur_trans) {
if (TRANS_ABORTED(cur_trans)) {
+ const int abort_error = cur_trans->aborted;
+
spin_unlock(&fs_info->trans_lock);
- return cur_trans->aborted;
+ return abort_error;
}
if (btrfs_blocked_trans_types[cur_trans->state] & type) {
spin_unlock(&fs_info->trans_lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0a0776489055..fb22d4425cb0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7200,8 +7200,12 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
fs_devices = find_fsid(fsid, NULL);
if (!fs_devices) {
- if (!btrfs_test_opt(fs_info, DEGRADED))
+ if (!btrfs_test_opt(fs_info, DEGRADED)) {
+ btrfs_err(fs_info,
+ "failed to find fsid %pU when attempting to open seed devices",
+ fsid);
return ERR_PTR(-ENOENT);
+ }
fs_devices = alloc_fs_devices(fsid);
if (IS_ERR(fs_devices))
diff --git a/fs/dcache.c b/fs/dcache.c
index 9cc0d47da321..e3634916ffb9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1700,7 +1700,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
dentry->d_flags = 0;
- lockref_init(&dentry->d_lockref, 1);
+ lockref_init(&dentry->d_lockref);
seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = dentry;
@@ -2966,11 +2966,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias)
goto out_err;
m2 = &alias->d_parent->d_inode->i_rwsem;
out_unalias:
- if (alias->d_op->d_unalias_trylock &&
+ if (alias->d_op && alias->d_op->d_unalias_trylock &&
!alias->d_op->d_unalias_trylock(alias))
goto out_err;
__d_move(alias, dentry, false);
- if (alias->d_op->d_unalias_unlock)
+ if (alias->d_op && alias->d_op->d_unalias_unlock)
alias->d_op->d_unalias_unlock(alias);
ret = 0;
out_err:
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 29f8963bb523..d771e06db738 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -726,7 +726,7 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
if (IS_ERR(pcl))
return PTR_ERR(pcl);
- lockref_init(&pcl->lockref, 1); /* one ref for this request */
+ lockref_init(&pcl->lockref); /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
diff --git a/fs/file_table.c b/fs/file_table.c
index f0291a66f9db..5c00dc38558d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
* refcount bumps we should reinitialize the reused file first.
*/
file_ref_init(&f->f_ref, 1);
+ /*
+ * Disable permission and pre-content events for all files by default.
+ * They may be enabled later by file_set_fsnotify_mode_from_watchers().
+ */
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM);
return 0;
}
@@ -375,7 +380,13 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
if (IS_ERR(file)) {
ihold(inode);
path_put(&path);
+ return file;
}
+ /*
+ * Disable all fsnotify events for pseudo files by default.
+ * They may be enabled by caller with file_set_fsnotify_mode().
+ */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY);
return file;
}
EXPORT_SYMBOL(alloc_file_pseudo);
@@ -400,6 +411,11 @@ struct file *alloc_file_pseudo_noaccount(struct inode *inode,
return file;
}
file_init_path(file, &path, fops);
+ /*
+ * Disable all fsnotify events for pseudo files by default.
+ * They may be enabled by caller with file_set_fsnotify_mode().
+ */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY);
return file;
}
EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5b5f789b37eb..2b2d1b755544 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -838,6 +838,12 @@ static int fuse_check_folio(struct folio *folio)
return 0;
}
+/*
+ * Attempt to steal a page from the splice() pipe and move it into the
+ * pagecache. If successful, the pointer in @pagep will be updated. The
+ * folio that was originally in @pagep will lose a reference and the new
+ * folio returned in @pagep will carry a reference.
+ */
static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
{
int err;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 198862b086ff..3805f9b06c9d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1636,7 +1636,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
goto out_err;
if (fc->cache_symlinks)
- return page_get_link(dentry, inode, callback);
+ return page_get_link_raw(dentry, inode, callback);
err = -ECHILD;
if (!dentry)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7d92a5479998..d63e56fd3dd2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -955,8 +955,10 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_invalidate_atime(inode);
}
- for (i = 0; i < ap->num_folios; i++)
+ for (i = 0; i < ap->num_folios; i++) {
folio_end_read(ap->folios[i], !err);
+ folio_put(ap->folios[i]);
+ }
if (ia->ff)
fuse_file_put(ia->ff, false);
@@ -1048,7 +1050,14 @@ static void fuse_readahead(struct readahead_control *rac)
ap = &ia->ap;
while (ap->num_folios < cur_pages) {
- folio = readahead_folio(rac);
+ /*
+ * This returns a folio with a ref held on it.
+ * The ref needs to be held until the request is
+ * completed, since the splice case (see
+ * fuse_try_move_page()) drops the ref after it's
+ * replaced in the page cache.
+ */
+ folio = __readahead_folio(rac);
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].length = folio_size(folio);
ap->num_folios++;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8c4c1f871a88..65c07aa95718 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1201,8 +1201,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
if (glops->go_instantiate)
gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED);
gl->gl_name = name;
+ lockref_init(&gl->gl_lockref);
lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
- gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 04cadc02e5a6..0727f60ad028 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
- spin_lock_init(&gl->gl_lockref.lock);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58bc5013ca49..2298e06797ac 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -236,7 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str
return NULL;
qd->qd_sbd = sdp;
- lockref_init(&qd->qd_lockref, 0);
+ lockref_init(&qd->qd_lockref);
qd->qd_id = qid;
qd->qd_slot = -1;
INIT_LIST_HEAD(&qd->qd_lru);
@@ -297,7 +297,6 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
spin_lock_bucket(hash);
*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
if (qd == NULL) {
- new_qd->qd_lockref.count++;
*qdp = new_qd;
list_add(&new_qd->qd_list, &sdp->sd_quota_list);
hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
@@ -1450,6 +1449,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
if (qd == NULL)
goto fail_brelse;
+ qd->qd_lockref.count = 0;
set_bit(QDF_CHANGE, &qd->qd_flags);
qd->qd_change = qc_change;
qd->qd_slot = slot;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b521eb15759e..0e47da82b0c2 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -427,12 +427,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
bio_put(bio);
goto zero_tail;
}
- if (dio->flags & IOMAP_DIO_WRITE) {
+ if (dio->flags & IOMAP_DIO_WRITE)
task_io_account_write(n);
- } else {
- if (dio->flags & IOMAP_DIO_DIRTY)
- bio_set_pages_dirty(bio);
- }
+ else if (dio->flags & IOMAP_DIO_DIRTY)
+ bio_set_pages_dirty(bio);
dio->size += n;
copied += n;
diff --git a/fs/namei.c b/fs/namei.c
index 3ab9440c5b93..ecb7b95c2ca3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -5356,10 +5356,9 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
EXPORT_SYMBOL(vfs_get_link);
/* get the link contents into pagecache */
-const char *page_get_link(struct dentry *dentry, struct inode *inode,
- struct delayed_call *callback)
+static char *__page_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
{
- char *kaddr;
struct page *page;
struct address_space *mapping = inode->i_mapping;
@@ -5378,8 +5377,23 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode,
}
set_delayed_call(callback, page_put_link, page);
BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
- kaddr = page_address(page);
- nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
+ return page_address(page);
+}
+
+const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ return __page_get_link(dentry, inode, callback);
+}
+EXPORT_SYMBOL_GPL(page_get_link_raw);
+
+const char *page_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ char *kaddr = __page_get_link(dentry, inode, callback);
+
+ if (!IS_ERR(kaddr))
+ nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index a3ed3f2980cb..8f1000f9f3df 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5087,30 +5087,29 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
{
struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb;
+ size_t start = seq->count;
int err;
- if (sb->s_op->show_options) {
- size_t start = seq->count;
-
- err = security_sb_show_options(seq, sb);
- if (err)
- return err;
+ err = security_sb_show_options(seq, sb);
+ if (err)
+ return err;
+ if (sb->s_op->show_options) {
err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err)
return err;
+ }
- if (unlikely(seq_has_overflowed(seq)))
- return -EAGAIN;
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
- if (seq->count == start)
- return 0;
+ if (seq->count == start)
+ return 0;
- /* skip leading comma */
- memmove(seq->buf + start, seq->buf + start + 1,
- seq->count - start - 1);
- seq->count--;
- }
+ /* skip leading comma */
+ memmove(seq->buf + start, seq->buf + start + 1,
+ seq->count - start - 1);
+ seq->count--;
return 0;
}
@@ -5191,39 +5190,45 @@ static int statmount_string(struct kstatmount *s, u64 flag)
size_t kbufsize;
struct seq_file *seq = &s->seq;
struct statmount *sm = &s->sm;
- u32 start = seq->count;
+ u32 start, *offp;
+
+ /* Reserve an empty string at the beginning for any unset offsets */
+ if (!seq->count)
+ seq_putc(seq, 0);
+
+ start = seq->count;
switch (flag) {
case STATMOUNT_FS_TYPE:
- sm->fs_type = start;
+ offp = &sm->fs_type;
ret = statmount_fs_type(s, seq);
break;
case STATMOUNT_MNT_ROOT:
- sm->mnt_root = start;
+ offp = &sm->mnt_root;
ret = statmount_mnt_root(s, seq);
break;
case STATMOUNT_MNT_POINT:
- sm->mnt_point = start;
+ offp = &sm->mnt_point;
ret = statmount_mnt_point(s, seq);
break;
case STATMOUNT_MNT_OPTS:
- sm->mnt_opts = start;
+ offp = &sm->mnt_opts;
ret = statmount_mnt_opts(s, seq);
break;
case STATMOUNT_OPT_ARRAY:
- sm->opt_array = start;
+ offp = &sm->opt_array;
ret = statmount_opt_array(s, seq);
break;
case STATMOUNT_OPT_SEC_ARRAY:
- sm->opt_sec_array = start;
+ offp = &sm->opt_sec_array;
ret = statmount_opt_sec_array(s, seq);
break;
case STATMOUNT_FS_SUBTYPE:
- sm->fs_subtype = start;
+ offp = &sm->fs_subtype;
statmount_fs_subtype(s, seq);
break;
case STATMOUNT_SB_SOURCE:
- sm->sb_source = start;
+ offp = &sm->sb_source;
ret = statmount_sb_source(s, seq);
break;
default:
@@ -5251,6 +5256,7 @@ static int statmount_string(struct kstatmount *s, u64 flag)
seq->buf[seq->count++] = '\0';
sm->mask |= flag;
+ *offp = start;
return 0;
}
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index f761d44b3436..0d1b6d35ff3b 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -155,8 +155,9 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}
-static void netfs_issue_read(struct netfs_io_request *rreq,
- struct netfs_io_subrequest *subreq)
+static void netfs_queue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq,
+ bool last_subreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -177,8 +178,17 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
}
}
+ if (last_subreq) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+
spin_unlock(&rreq->lock);
+}
+static void netfs_issue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
switch (subreq->source) {
case NETFS_DOWNLOAD_FROM_SERVER:
rreq->netfs_ops->issue_read(subreq);
@@ -293,11 +303,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
}
size -= slice;
start += slice;
- if (size <= 0) {
- smp_wmb(); /* Write lists before ALL_QUEUED. */
- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- }
+ netfs_queue_read(rreq, subreq, size <= 0);
netfs_issue_read(rreq, subreq);
cond_resched();
} while (size > 0);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index eb76f98c894b..1c4f953c3d68 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -135,6 +135,8 @@ extern atomic_t netfs_n_rh_write_begin;
extern atomic_t netfs_n_rh_write_done;
extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
+extern atomic_t netfs_n_rh_retry_read_req;
+extern atomic_t netfs_n_rh_retry_read_subreq;
extern atomic_t netfs_n_wh_buffered_write;
extern atomic_t netfs_n_wh_writethrough;
extern atomic_t netfs_n_wh_dio_write;
@@ -147,6 +149,8 @@ extern atomic_t netfs_n_wh_upload_failed;
extern atomic_t netfs_n_wh_write;
extern atomic_t netfs_n_wh_write_done;
extern atomic_t netfs_n_wh_write_failed;
+extern atomic_t netfs_n_wh_retry_write_req;
+extern atomic_t netfs_n_wh_retry_write_subreq;
extern atomic_t netfs_n_wb_lock_skip;
extern atomic_t netfs_n_wb_lock_wait;
extern atomic_t netfs_n_folioq;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index f65affa5a9e4..636cc5a98ef5 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -470,7 +470,8 @@ void netfs_read_collection_worker(struct work_struct *work)
*/
void netfs_wake_read_collector(struct netfs_io_request *rreq)
{
- if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
+ !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
if (!work_pending(&rreq->work)) {
netfs_get_request(rreq, netfs_rreq_trace_get_work);
if (!queue_work(system_unbound_wq, &rreq->work))
@@ -586,7 +587,8 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
/* If we are at the head of the queue, wake up the collector. */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests))
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
+ test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
netfs_wake_read_collector(rreq);
netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 2290af0d51ac..0f294b26e08c 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -14,7 +14,7 @@ static void netfs_reissue_read(struct netfs_io_request *rreq,
{
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_stat(&netfs_n_rh_retry_read_subreq);
subreq->rreq->netfs_ops->issue_read(subreq);
}
@@ -48,6 +48,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
subreq->retry_count++;
netfs_reset_iter(subreq);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
}
}
@@ -75,7 +76,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
struct iov_iter source;
unsigned long long start, len;
size_t part;
- bool boundary = false;
+ bool boundary = false, subreq_superfluous = false;
/* Go through the subreqs and find the next span of contiguous
* buffer that we then rejig (cifs, for example, needs the
@@ -116,8 +117,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
/* Work through the sublist. */
subreq = from;
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
- if (!len)
+ if (!len) {
+ subreq_superfluous = true;
break;
+ }
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start - subreq->transferred;
subreq->len = len + subreq->transferred;
@@ -154,19 +157,21 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
- if (subreq == to)
+ if (subreq == to) {
+ subreq_superfluous = false;
break;
+ }
}
/* If we managed to use fewer subreqs, we can discard the
* excess; if we used the same number, then we're done.
*/
if (!len) {
- if (subreq == to)
+ if (!subreq_superfluous)
continue;
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
if (subreq == to)
@@ -187,14 +192,12 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start;
subreq->len = len;
- subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
subreq->stream_nr = stream->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
@@ -256,14 +259,34 @@ void netfs_retry_reads(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+
+ netfs_stat(&netfs_n_rh_retry_read_req);
+
+ set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ continue;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ for (;;) {
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ break;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
}
+ clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
netfs_retry_read_subrequests(rreq);
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index f1af344266cc..ab6b916addc4 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -29,6 +29,8 @@ atomic_t netfs_n_rh_write_begin;
atomic_t netfs_n_rh_write_done;
atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
+atomic_t netfs_n_rh_retry_read_req;
+atomic_t netfs_n_rh_retry_read_subreq;
atomic_t netfs_n_wh_buffered_write;
atomic_t netfs_n_wh_writethrough;
atomic_t netfs_n_wh_dio_write;
@@ -41,6 +43,8 @@ atomic_t netfs_n_wh_upload_failed;
atomic_t netfs_n_wh_write;
atomic_t netfs_n_wh_write_done;
atomic_t netfs_n_wh_write_failed;
+atomic_t netfs_n_wh_retry_write_req;
+atomic_t netfs_n_wh_retry_write_subreq;
atomic_t netfs_n_wb_lock_skip;
atomic_t netfs_n_wb_lock_wait;
atomic_t netfs_n_folioq;
@@ -81,6 +85,11 @@ int netfs_stats_show(struct seq_file *m, void *v)
atomic_read(&netfs_n_wh_write),
atomic_read(&netfs_n_wh_write_done),
atomic_read(&netfs_n_wh_write_failed));
+ seq_printf(m, "Retries: rq=%u rs=%u wq=%u ws=%u\n",
+ atomic_read(&netfs_n_rh_retry_read_req),
+ atomic_read(&netfs_n_rh_retry_read_subreq),
+ atomic_read(&netfs_n_wh_retry_write_req),
+ atomic_read(&netfs_n_wh_retry_write_subreq));
seq_printf(m, "Objs : rr=%u sr=%u foq=%u wsc=%u\n",
atomic_read(&netfs_n_rh_rreq),
atomic_read(&netfs_n_rh_sreq),
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 69727411683e..77279fc5b5a7 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -253,6 +253,7 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
subreq->retry_count++;
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_stat(&netfs_n_wh_retry_write_subreq);
netfs_do_issue_write(stream, subreq);
}
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index c841a851dd73..545d33079a77 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -203,6 +203,8 @@ void netfs_retry_writes(struct netfs_io_request *wreq)
struct netfs_io_stream *stream;
int s;
+ netfs_stat(&netfs_n_wh_retry_write_req);
+
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 035ba52742a5..4db912f56230 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -781,6 +781,43 @@ int nfs4_inode_return_delegation(struct inode *inode)
}
/**
+ * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called to request that the delegation be returned as soon
+ * as the file is closed. If the file is already closed, the delegation is
+ * immediately returned.
+ */
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *ret = NULL;
+
+ if (!inode)
+ return;
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (!delegation)
+ goto out;
+ spin_lock(&delegation->lock);
+ if (!delegation->inode)
+ goto out_unlock;
+ if (list_empty(&NFS_I(inode)->open_files) &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ } else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out_unlock:
+ spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(inode);
+out:
+ rcu_read_unlock();
+ nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
* nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 71524d34ed20..8ff5ab9c5c25 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -49,6 +49,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
unsigned long pagemod_limit, u32 deleg_type);
int nfs4_inode_return_delegation(struct inode *inode);
void nfs4_inode_return_delegation_on_close(struct inode *inode);
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_evict_delegation(struct inode *inode);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f45beea92d03..f32f8d7c9122 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
@@ -130,6 +131,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
dreq->count = req_start;
}
+static void nfs_direct_file_adjust_size_locked(struct inode *inode,
+ loff_t offset, size_t count)
+{
+ loff_t newsize = offset + (loff_t)count;
+ loff_t oldsize = i_size_read(inode);
+
+ if (newsize > oldsize) {
+ i_size_write(inode, newsize);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
+ trace_nfs_size_grow(inode, newsize);
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+ }
+}
+
/**
* nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
@@ -272,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
nfs_direct_count_bytes(dreq, hdr);
spin_unlock(&dreq->lock);
+ nfs_update_delegated_atime(dreq->inode);
+
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
@@ -741,6 +758,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct inode *inode = dreq->inode;
int flags = NFS_ODIRECT_DONE;
trace_nfs_direct_write_completion(dreq);
@@ -762,6 +780,11 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
}
spin_unlock(&dreq->lock);
+ spin_lock(&inode->i_lock);
+ nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
+ nfs_update_delegated_mtime_locked(dreq->inode);
+ spin_unlock(&inode->i_lock);
+
while (!list_empty(&hdr->pages)) {
req = nfs_list_entry(hdr->pages.next);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index df9669d4ded7..6e95db6c17e9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -133,6 +133,7 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
if (err)
return NULL;
+ label->lsmid = shim.id;
label->label = shim.context;
label->len = shim.len;
return label;
@@ -145,7 +146,7 @@ nfs4_label_release_security(struct nfs4_label *label)
if (label) {
shim.context = label->label;
shim.len = label->len;
- shim.id = LSM_ID_UNDEF;
+ shim.id = label->lsmid;
security_release_secctx(&shim);
}
}
@@ -3906,8 +3907,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
{
+ struct dentry *dentry = ctx->dentry;
if (ctx->state == NULL)
return;
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ nfs4_inode_set_return_delegation_on_close(d_inode(dentry));
if (is_sync)
nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx));
else
@@ -6269,7 +6273,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
size_t buflen)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_label label = {0, 0, buflen, buf};
+ struct nfs4_label label = {0, 0, 0, buflen, buf};
u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
struct nfs_fattr fattr = {
@@ -6374,7 +6378,7 @@ static int nfs4_do_set_security_label(struct inode *inode,
static int
nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
{
- struct nfs4_label ilabel = {0, 0, buflen, (char *)buf };
+ struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf };
struct nfs_fattr *fattr;
int status;
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 0e552d873eaa..fb9b1656a287 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -446,11 +446,20 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
struct nfsd_file, nf_gc);
struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+ struct svc_serv *serv;
spin_lock(&l->lock);
list_move_tail(&nf->nf_gc, &l->freeme);
spin_unlock(&l->lock);
- svc_wake_up(nn->nfsd_serv);
+
+ /*
+ * The filecache laundrette is shut down after the
+ * nn->nfsd_serv pointer is cleared, but before the
+ * svc_serv is freed.
+ */
+ serv = nn->nfsd_serv;
+ if (serv)
+ svc_wake_up(serv);
}
}
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3be7201b1c..5fb202acb0fd 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -84,6 +84,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e34e98db969..7b5433bd3019 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -76,6 +76,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 50e468bdb8d4..484077200c5d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
return status;
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
- if (unlikely(status || cb->cb_seq_status))
+ if (unlikely(status || cb->cb_status))
return status;
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
return -NFSERR_BAD_XDR;
@@ -1583,8 +1583,11 @@ nfsd4_run_cb_work(struct work_struct *work)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
- if (!clnt) {
- /* Callback channel broken, or client killed; give up: */
+ if (!clnt || clp->cl_state == NFSD4_COURTESY) {
+ /*
+ * Callback channel broken, client killed or
+ * nfs4_client in courtesy state; give up.
+ */
nfsd41_destroy_cb(cb);
return;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b7a0cfd05401..153eeea2c7c9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4459,10 +4459,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
} while (slot && --cnt > 0);
}
+
+out:
seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
seq->target_maxslots = session->se_target_maxslots;
-out:
switch (clp->cl_cb_state) {
case NFSD4_CB_DOWN:
seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 32019751a41e..aef474f1b84b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -380,8 +380,9 @@ __fh_verify(struct svc_rqst *rqstp,
error = check_nfsd_access(exp, rqstp, may_bypass_gss);
if (error)
goto out;
-
- svc_xprt_set_valid(rqstp->rq_xprt);
+ /* During LOCALIO call to fh_verify will be called with a NULL rqstp */
+ if (rqstp)
+ svc_xprt_set_valid(rqstp->rq_xprt);
/* Finally, check access permissions. */
error = nfsd_permission(cred, exp, dentry, access);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 8ee495a58d0a..fae1b6d397ea 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(fsnotify);
* Later, fsnotify permission hooks do not check if there are permission event
* watches, but that there were permission event watches at open time.
*/
-void file_set_fsnotify_mode(struct file *file)
+void file_set_fsnotify_mode_from_watchers(struct file *file)
{
struct dentry *dentry = file->f_path.dentry, *parent;
struct super_block *sb = dentry->d_sb;
@@ -665,7 +665,7 @@ void file_set_fsnotify_mode(struct file *file)
*/
if (likely(!fsnotify_sb_has_priority_watchers(sb,
FSNOTIFY_PRIO_CONTENT))) {
- file->f_mode |= FMODE_NONOTIFY_PERM;
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
return;
}
@@ -676,7 +676,7 @@ void file_set_fsnotify_mode(struct file *file)
if ((!d_is_dir(dentry) && !d_is_reg(dentry)) ||
likely(!fsnotify_sb_has_priority_watchers(sb,
FSNOTIFY_PRIO_PRE_CONTENT))) {
- file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
return;
}
@@ -686,19 +686,25 @@ void file_set_fsnotify_mode(struct file *file)
*/
mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask,
- FSNOTIFY_PRE_CONTENT_EVENTS)))
+ FSNOTIFY_PRE_CONTENT_EVENTS))) {
+ /* Enable pre-content events */
+ file_set_fsnotify_mode(file, 0);
return;
+ }
/* Is parent watching for pre-content events on this file? */
if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
parent = dget_parent(dentry);
p_mask = fsnotify_inode_watches_children(d_inode(parent));
dput(parent);
- if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)
+ if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) {
+ /* Enable pre-content events */
+ file_set_fsnotify_mode(file, 0);
return;
+ }
}
/* Nobody watching for pre-content events from this file */
- file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
}
#endif
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 663f8656158d..f7fddf8ecf73 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -37,7 +37,6 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
}
const struct dentry_operations ns_dentry_operations = {
- .d_delete = always_delete_dentry,
.d_dname = ns_dname,
.d_prune = stashed_dentry_prune,
};
diff --git a/fs/open.c b/fs/open.c
index 932e5a6de63b..1be20de9f283 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -905,7 +905,8 @@ static int do_dentry_open(struct file *f,
f->f_sb_err = file_sample_sb_err(f);
if (unlikely(f->f_flags & O_PATH)) {
- f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY;
+ f->f_mode = FMODE_PATH | FMODE_OPENED;
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY);
f->f_op = &empty_fops;
return 0;
}
@@ -935,10 +936,10 @@ static int do_dentry_open(struct file *f,
/*
* Set FMODE_NONOTIFY_* bits according to existing permission watches.
- * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't
- * change anything.
+ * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a
+ * pseudo file, this call will not change the mode.
*/
- file_set_fsnotify_mode(f);
+ file_set_fsnotify_mode_from_watchers(f);
error = fsnotify_open_perm(f);
if (error)
goto cleanup_all;
@@ -1122,7 +1123,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
if (!IS_ERR(f)) {
int error;
- f->f_mode |= FMODE_NONOTIFY;
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY);
error = vfs_open(path, f);
if (error) {
fput(f);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 0c28e5fa3407..d7310fcf3888 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -618,7 +618,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper);
- dput(upper);
if (!err) {
/* Restore timestamps on parent (best effort) */
@@ -626,6 +625,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, upper);
}
+ dput(upper);
}
inode_unlock(udir);
if (err)
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 049352f973de..c0478b3c55d9 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
switch (cmd) {
case FS_IOC_GETVERSION:
case PIDFD_GET_CGROUP_NAMESPACE:
- case PIDFD_GET_INFO:
case PIDFD_GET_IPC_NAMESPACE:
case PIDFD_GET_MNT_NAMESPACE:
case PIDFD_GET_NET_NAMESPACE:
@@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
return true;
}
+ /* Extensible ioctls require some more careful checks. */
+ switch (_IOC_NR(cmd)) {
+ case _IOC_NR(PIDFD_GET_INFO):
+ /*
+ * Try to prevent performing a pidfd ioctl when someone
+ * erronously mistook the file descriptor for a pidfd.
+ * This is not perfect but will catch most cases.
+ */
+ return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO));
+ }
+
return false;
}
@@ -511,7 +521,6 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen)
}
const struct dentry_operations pidfs_dentry_operations = {
- .d_delete = always_delete_dentry,
.d_dname = pidfs_dname,
.d_prune = stashed_dentry_prune,
};
diff --git a/fs/pipe.c b/fs/pipe.c
index 94b59045ab44..ce1af7592780 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -960,6 +960,12 @@ int create_pipe_files(struct file **res, int flags)
res[1] = f;
stream_open(inode, res[0]);
stream_open(inode, res[1]);
+ /*
+ * Disable permission and pre-content events, but enable legacy
+ * inotify events for legacy users.
+ */
+ file_set_fsnotify_mode(res[0], FMODE_NONOTIFY_PERM);
+ file_set_fsnotify_mode(res[1], FMODE_NONOTIFY_PERM);
return 0;
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index a00120a3c099..10d01eb09c43 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1524,7 +1524,7 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
pr_warn_once("Unexpected adding of device dump\n");
if (vmcore_open) {
ret = -EBUSY;
- goto out_err;
+ goto unlock;
}
list_add_tail(&dump->list, &vmcoredd_list);
@@ -1532,6 +1532,9 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
mutex_unlock(&vmcore_mutex);
return 0;
+unlock:
+ mutex_unlock(&vmcore_mutex);
+
out_err:
vfree(buf);
vfree(dump);
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index a68434ad744a..cddeb2adbf4a 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -253,6 +253,7 @@ struct cifs_cred {
struct cifs_open_info_data {
bool adjust_tz;
bool reparse_point;
+ bool contains_posix_file_info;
struct {
/* ioctl response buffer */
struct {
@@ -357,7 +358,7 @@ struct smb_version_operations {
int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *);
void (*downgrade_oplock)(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache);
+ __u16 epoch, bool *purge_cache);
/* process transaction2 response */
bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
char *, int);
@@ -552,12 +553,12 @@ struct smb_version_operations {
/* if we can do cache read operations */
bool (*is_read_op)(__u32);
/* set oplock level for the inode */
- void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int,
- bool *);
+ void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch,
+ bool *purge_cache);
/* create lease context buffer for CREATE request */
char * (*create_lease_buf)(u8 *lease_key, u8 oplock);
/* parse lease context buffer and return oplock/epoch info */
- __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
+ __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey);
ssize_t (*copychunk_range)(const unsigned int,
struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file,
@@ -1447,7 +1448,7 @@ struct cifs_fid {
__u8 create_guid[16];
__u32 access;
struct cifs_pending_open *pending_open;
- unsigned int epoch;
+ __u16 epoch;
#ifdef CONFIG_CIFS_DEBUG2
__u64 mid;
#endif /* CIFS_DEBUG2 */
@@ -1480,7 +1481,7 @@ struct cifsFileInfo {
bool oplock_break_cancelled:1;
bool status_file_deleted:1; /* file has been deleted */
bool offload:1; /* offload final part of _put to a wq */
- unsigned int oplock_epoch; /* epoch from the lease break */
+ __u16 oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
spinlock_t file_info_lock; /* protects four flag/count fields above */
@@ -1508,7 +1509,6 @@ struct cifs_io_parms {
struct cifs_io_request {
struct netfs_io_request rreq;
struct cifsFileInfo *cfile;
- struct TCP_Server_Info *server;
pid_t pid;
};
@@ -1577,7 +1577,7 @@ struct cifsInodeInfo {
spinlock_t open_file_lock; /* protects openFileList */
__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
unsigned int oplock; /* oplock/lease level we have */
- unsigned int epoch; /* used to track lease state changes */
+ __u16 epoch; /* used to track lease state changes */
#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
#define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */
@@ -2325,8 +2325,8 @@ struct smb2_compound_vars {
struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
struct kvec close_iov;
- struct smb2_file_rename_info rename_info;
- struct smb2_file_link_info link_info;
+ struct smb2_file_rename_info_hdr rename_info;
+ struct smb2_file_link_info_hdr link_info;
struct kvec ea_iov;
};
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index dad521336b5e..f65a8a90ba27 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -150,25 +150,27 @@ again:
if (rc)
continue;
- if (tgt.flags & DFSREF_STORAGE_SERVER) {
- rc = cifs_mount_get_tcon(mnt_ctx);
- if (!rc)
- rc = cifs_is_path_remote(mnt_ctx);
+ rc = cifs_mount_get_tcon(mnt_ctx);
+ if (rc) {
+ if (tgt.server_type == DFS_TYPE_LINK &&
+ DFS_INTERLINK(tgt.flags))
+ rc = -EREMOTE;
+ } else {
+ rc = cifs_is_path_remote(mnt_ctx);
if (!rc) {
ref_walk_set_tgt_hint(rw);
break;
}
- if (rc != -EREMOTE)
- continue;
}
-
- rc = ref_walk_advance(rw);
- if (!rc) {
- rc = setup_dfs_ref(&tgt, rw);
- if (rc)
- break;
- ref_walk_mark_end(rw);
- goto again;
+ if (rc == -EREMOTE) {
+ rc = ref_walk_advance(rw);
+ if (!rc) {
+ rc = setup_dfs_ref(&tgt, rw);
+ if (rc)
+ break;
+ ref_walk_mark_end(rw);
+ goto again;
+ }
}
}
} while (rc && ref_walk_descend(rw));
diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h
index ed4cd7cf1ec6..e60f0a24a8a1 100644
--- a/fs/smb/client/dfs.h
+++ b/fs/smb/client/dfs.h
@@ -188,4 +188,11 @@ static inline void dfs_put_root_smb_sessions(struct list_head *head)
}
}
+static inline const char *dfs_ses_refpath(struct cifs_ses *ses)
+{
+ const char *path = ses->server->leaf_fullpath;
+
+ return path ? path + 1 : ERR_PTR(-ENOENT);
+}
+
#endif /* _CIFS_DFS_H */
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 5022bb1f122a..4dada26d56b5 100644
--- a/fs/smb/client/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
@@ -1136,33 +1136,19 @@ static bool is_ses_good(struct cifs_ses *ses)
return ret;
}
-static char *get_ses_refpath(struct cifs_ses *ses)
-{
- struct TCP_Server_Info *server = ses->server;
- char *path = ERR_PTR(-ENOENT);
-
- if (server->leaf_fullpath) {
- path = kstrdup(server->leaf_fullpath + 1, GFP_KERNEL);
- if (!path)
- path = ERR_PTR(-ENOMEM);
- }
- return path;
-}
-
/* Refresh dfs referral of @ses */
static void refresh_ses_referral(struct cifs_ses *ses)
{
struct cache_entry *ce;
unsigned int xid;
- char *path;
+ const char *path;
int rc = 0;
xid = get_xid();
- path = get_ses_refpath(ses);
+ path = dfs_ses_refpath(ses);
if (IS_ERR(path)) {
rc = PTR_ERR(path);
- path = NULL;
goto out;
}
@@ -1181,7 +1167,6 @@ static void refresh_ses_referral(struct cifs_ses *ses)
out:
free_xid(xid);
- kfree(path);
}
static int __refresh_tcon_referral(struct cifs_tcon *tcon,
@@ -1231,19 +1216,18 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh)
struct dfs_info3_param *refs = NULL;
struct cache_entry *ce;
struct cifs_ses *ses;
- unsigned int xid;
bool needs_refresh;
- char *path;
+ const char *path;
+ unsigned int xid;
int numrefs = 0;
int rc = 0;
xid = get_xid();
ses = tcon->ses;
- path = get_ses_refpath(ses);
+ path = dfs_ses_refpath(ses);
if (IS_ERR(path)) {
rc = PTR_ERR(path);
- path = NULL;
goto out;
}
@@ -1271,7 +1255,6 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh)
out:
free_xid(xid);
- kfree(path);
free_dfs_info_array(refs, numrefs);
}
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 79de2f2f9c41..8582cf61242c 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -147,7 +147,7 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq)
struct netfs_io_request *rreq = subreq->rreq;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
- struct TCP_Server_Info *server = req->server;
+ struct TCP_Server_Info *server;
struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
size_t size;
int rc = 0;
@@ -156,6 +156,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq)
rdata->xid = get_xid();
rdata->have_xid = true;
}
+
+ server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
rdata->server = server;
if (cifs_sb->ctx->rsize == 0)
@@ -198,7 +200,7 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
struct netfs_io_request *rreq = subreq->rreq;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
- struct TCP_Server_Info *server = req->server;
+ struct TCP_Server_Info *server = rdata->server;
int rc = 0;
cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
@@ -266,7 +268,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
open_file = file->private_data;
rreq->netfs_priv = file->private_data;
req->cfile = cifsFileInfo_get(open_file);
- req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
req->pid = req->cfile->pid;
} else if (rreq->origin != NETFS_WRITEBACK) {
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 9cc31cf6ebd0..616149c7f0a5 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1215,6 +1215,19 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
rc = server->ops->parse_reparse_point(cifs_sb,
full_path,
iov, data);
+ /*
+ * If the reparse point was not handled but it is the
+ * name surrogate which points to directory, then treat
+ * is as a new mount point. Name surrogate reparse point
+ * represents another named entity in the system.
+ */
+ if (rc == -EOPNOTSUPP &&
+ IS_REPARSE_TAG_NAME_SURROGATE(data->reparse.tag) &&
+ (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY)) {
+ rc = 0;
+ cifs_create_junction_fattr(fattr, sb);
+ goto out;
+ }
}
if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) {
@@ -1408,7 +1421,7 @@ int cifs_get_inode_info(struct inode **inode,
struct cifs_fattr fattr = {};
int rc;
- if (is_inode_cache_good(*inode)) {
+ if (!data && is_inode_cache_good(*inode)) {
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
return 0;
}
@@ -1507,7 +1520,7 @@ int smb311_posix_get_inode_info(struct inode **inode,
struct cifs_fattr fattr = {};
int rc;
- if (is_inode_cache_good(*inode)) {
+ if (!data && is_inode_cache_good(*inode)) {
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
return 0;
}
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 0a5a52a8a7dd..2b9e9885dc42 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -1088,13 +1088,12 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
le32_to_cpu(buf->ReparseTag));
return -EIO;
}
- break;
+ return 0;
default:
cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n",
le32_to_cpu(buf->ReparseTag));
- break;
+ return -EOPNOTSUPP;
}
- return 0;
}
int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h
index 5a753fec7e2c..c0be5ab45a78 100644
--- a/fs/smb/client/reparse.h
+++ b/fs/smb/client/reparse.h
@@ -99,14 +99,30 @@ static inline bool reparse_inode_match(struct inode *inode,
static inline bool cifs_open_data_reparse(struct cifs_open_info_data *data)
{
- struct smb2_file_all_info *fi = &data->fi;
- u32 attrs = le32_to_cpu(fi->Attributes);
+ u32 attrs;
bool ret;
- ret = data->reparse_point || (attrs & ATTR_REPARSE);
- if (ret)
- attrs |= ATTR_REPARSE;
- fi->Attributes = cpu_to_le32(attrs);
+ if (data->contains_posix_file_info) {
+ struct smb311_posix_qinfo *fi = &data->posix_fi;
+
+ attrs = le32_to_cpu(fi->DosAttributes);
+ if (data->reparse_point) {
+ attrs |= ATTR_REPARSE;
+ fi->DosAttributes = cpu_to_le32(attrs);
+ }
+
+ } else {
+ struct smb2_file_all_info *fi = &data->fi;
+
+ attrs = le32_to_cpu(fi->Attributes);
+ if (data->reparse_point) {
+ attrs |= ATTR_REPARSE;
+ fi->Attributes = cpu_to_le32(attrs);
+ }
+ }
+
+ ret = attrs & ATTR_REPARSE;
+
return ret;
}
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 9756b876a75e..d6e2fb669c40 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -377,7 +377,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
static void
cifs_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
cifs_set_oplock_level(cinode, oplock);
}
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 5dfb30b0a852..826b57a5a2a8 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -650,6 +650,7 @@ finished:
switch (cmds[i]) {
case SMB2_OP_QUERY_INFO:
idata = in_iov[i].iov_base;
+ idata->contains_posix_file_info = false;
if (rc == 0 && cfile && cfile->symlink_target) {
idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
if (!idata->symlink_target)
@@ -673,6 +674,7 @@ finished:
break;
case SMB2_OP_POSIX_QUERY_INFO:
idata = in_iov[i].iov_base;
+ idata->contains_posix_file_info = true;
if (rc == 0 && cfile && cfile->symlink_target) {
idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
if (!idata->symlink_target)
@@ -770,6 +772,7 @@ finished:
idata = in_iov[i].iov_base;
idata->reparse.io.iov = *iov;
idata->reparse.io.buftype = resp_buftype[i + 1];
+ idata->contains_posix_file_info = false; /* BB VERIFY */
rbuf = reparse_buf_ptr(iov);
if (IS_ERR(rbuf)) {
rc = PTR_ERR(rbuf);
@@ -791,6 +794,7 @@ finished:
case SMB2_OP_QUERY_WSL_EA:
if (!rc) {
idata = in_iov[i].iov_base;
+ idata->contains_posix_file_info = false;
qi_rsp = rsp_iov[i + 1].iov_base;
data[0] = (u8 *)qi_rsp + le16_to_cpu(qi_rsp->OutputBufferOffset);
size[0] = le32_to_cpu(qi_rsp->OutputBufferLength);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 77309217dab4..4dd11eafb69d 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -1001,6 +1001,7 @@ static int smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
if (!data->symlink_target)
return -ENOMEM;
}
+ data->contains_posix_file_info = false;
return SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, &data->fi);
}
@@ -3904,22 +3905,22 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
static void
smb2_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
server->ops->set_oplock_level(cinode, oplock, 0, NULL);
}
static void
smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache);
+ __u16 epoch, bool *purge_cache);
static void
smb3_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
unsigned int old_state = cinode->oplock;
- unsigned int old_epoch = cinode->epoch;
+ __u16 old_epoch = cinode->epoch;
unsigned int new_state;
if (epoch > old_epoch) {
@@ -3939,7 +3940,7 @@ smb3_downgrade_oplock(struct TCP_Server_Info *server,
static void
smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
oplock &= 0xFF;
cinode->lease_granted = false;
@@ -3963,7 +3964,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
static void
smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
char message[5] = {0};
unsigned int new_oplock = 0;
@@ -4000,7 +4001,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
static void
smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
unsigned int old_oplock = cinode->oplock;
@@ -4114,7 +4115,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
}
static __u8
-smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
+smb2_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key)
{
struct create_lease *lc = (struct create_lease *)buf;
@@ -4125,7 +4126,7 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
}
static __u8
-smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
+smb3_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key)
{
struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
@@ -4964,6 +4965,10 @@ one_more:
next_buffer = (char *)cifs_buf_get();
else
next_buffer = (char *)cifs_small_buf_get();
+ if (!next_buffer) {
+ cifs_server_dbg(VFS, "No memory for (large) SMB response\n");
+ return -1;
+ }
memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd);
}
@@ -5146,7 +5151,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
FILE_CREATE, CREATE_NOT_DIR |
CREATE_OPTION_SPECIAL, ACL_NO_MODE);
oparms.fid = &fid;
-
+ idata.contains_posix_file_info = false;
rc = server->ops->open(xid, &oparms, &oplock, &idata);
if (rc)
goto out;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 40ad9e79437a..ed7812247ebc 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -2169,7 +2169,7 @@ tcon_exit:
tcon_error_exit:
if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME)
- cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
+ cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree);
goto tcon_exit;
}
@@ -2329,7 +2329,7 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info,
int smb2_parse_contexts(struct TCP_Server_Info *server,
struct kvec *rsp_iov,
- unsigned int *epoch,
+ __u16 *epoch,
char *lease_key, __u8 *oplock,
struct smb2_file_all_info *buf,
struct create_posix_rsp *posix)
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 2336dfb23f36..4662c7e2d259 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -283,7 +283,7 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
int smb2_parse_contexts(struct TCP_Server_Info *server,
struct kvec *rsp_iov,
- unsigned int *epoch,
+ __u16 *epoch,
char *lease_key, __u8 *oplock,
struct smb2_file_all_info *buf,
struct create_posix_rsp *posix);
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 3336df2ea5d4..c7a0efda4403 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1707,23 +1707,33 @@ struct smb2_file_internal_info {
} __packed; /* level 6 Query */
struct smb2_file_rename_info { /* encoding of request for level 10 */
- __u8 ReplaceIfExists; /* 1 = replace existing target with new */
- /* 0 = fail if target already exists */
- __u8 Reserved[7];
- __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
- __le32 FileNameLength;
+ /* New members MUST be added within the struct_group() macro below. */
+ __struct_group(smb2_file_rename_info_hdr, __hdr, __packed,
+ __u8 ReplaceIfExists; /* 1 = replace existing target with new */
+ /* 0 = fail if target already exists */
+ __u8 Reserved[7];
+ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
+ __le32 FileNameLength;
+ );
char FileName[]; /* New name to be assigned */
/* padding - overall struct size must be >= 24 so filename + pad >= 6 */
} __packed; /* level 10 Set */
+static_assert(offsetof(struct smb2_file_rename_info, FileName) == sizeof(struct smb2_file_rename_info_hdr),
+ "struct member likely outside of __struct_group()");
struct smb2_file_link_info { /* encoding of request for level 11 */
- __u8 ReplaceIfExists; /* 1 = replace existing link with new */
- /* 0 = fail if link already exists */
- __u8 Reserved[7];
- __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
- __le32 FileNameLength;
+ /* New members MUST be added within the struct_group() macro below. */
+ __struct_group(smb2_file_link_info_hdr, __hdr, __packed,
+ __u8 ReplaceIfExists; /* 1 = replace existing link with new */
+ /* 0 = fail if link already exists */
+ __u8 Reserved[7];
+ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
+ __le32 FileNameLength;
+ );
char FileName[]; /* Name to be assigned to new link */
} __packed; /* level 11 Set */
+static_assert(offsetof(struct smb2_file_link_info, FileName) == sizeof(struct smb2_file_link_info_hdr),
+ "struct member likely outside of __struct_group()");
/*
* This level 18, although with struct with same name is different from cifs
diff --git a/fs/smb/common/smbfsctl.h b/fs/smb/common/smbfsctl.h
index 4b379e84c46b..3253a18ecb5c 100644
--- a/fs/smb/common/smbfsctl.h
+++ b/fs/smb/common/smbfsctl.h
@@ -159,6 +159,9 @@
#define IO_REPARSE_TAG_LX_CHR 0x80000025
#define IO_REPARSE_TAG_LX_BLK 0x80000026
+/* If Name Surrogate Bit is set, the file or directory represents another named entity in the system. */
+#define IS_REPARSE_TAG_NAME_SURROGATE(tag) (!!((tag) & 0x20000000))
+
/* fsctl flags */
/* If Flags is set to this value, the request is an FSCTL not ioctl request */
#define SMB2_0_IOCTL_IS_FSCTL 0x00000001
diff --git a/fs/stat.c b/fs/stat.c
index 2c0e111a098a..f13308bfdc98 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -281,6 +281,8 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
u32 request_mask)
{
int error = vfs_getattr(path, stat, request_mask, flags);
+ if (error)
+ return error;
if (request_mask & STATX_MNT_ID_UNIQUE) {
stat->mnt_id = real_mount(path->mnt)->mnt_id_unique;
@@ -302,7 +304,7 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
if (S_ISBLK(stat->mode))
bdev_statx(path, stat, request_mask);
- return error;
+ return 0;
}
static int vfs_statx_fd(int fd, int flags, struct kstat *stat,
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index e95b8a48d8a0..1d94bb784108 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -21,7 +21,8 @@
#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
-static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375";
+static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376',
+ '\375' };
static int follow_symlinks;
module_param(follow_symlinks, int, 0444);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 40ad22fb808b..0ef19f1469ec 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof(
int error;
/*
- * If there are already extents in the file, try an exact EOF block
- * allocation to extend the file as a contiguous extent. If that fails,
- * or it's the first allocation in a file, just try for a stripe aligned
- * allocation.
+ * If there are already extents in the file, and xfs_bmap_adjacent() has
+ * given a better blkno, try an exact EOF block allocation to extend the
+ * file as a contiguous extent. If that fails, or it's the first
+ * allocation in a file, just try for a stripe aligned allocation.
*/
- if (ap->offset) {
+ if (ap->eof) {
xfs_extlen_t nextminlen = 0;
/*
@@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length(
int error;
ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
- xfs_bmap_adjacent(ap);
+ if (!xfs_bmap_adjacent(ap))
+ ap->eof = false;
/*
* Search for an allocation group with a single extent large enough for
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index bdcd40f0ec74..19877d99f255 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -224,7 +224,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
bool xchk_pptr_looks_zapped(struct xfs_inode *ip);
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/* Decide if a repair is required. */
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
{
@@ -244,10 +243,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
-#else
-# define xchk_needs_repair(sc) (false)
-# define xchk_could_repair(sc) (false)
-#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 2f641b6d663e..13ff1c933cb8 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -1055,9 +1055,17 @@ xrep_dinode_check_dfork(
return true;
break;
case S_IFREG:
- if (fmt == XFS_DINODE_FMT_LOCAL)
+ switch (fmt) {
+ case XFS_DINODE_FMT_LOCAL:
return true;
- fallthrough;
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ case XFS_DINODE_FMT_META_BTREE:
+ break;
+ default:
+ return true;
+ }
+ break;
case S_IFLNK:
case S_IFDIR:
switch (fmt) {
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 823c00d1a502..af0a3a9e5ed9 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -191,7 +191,16 @@ int xrep_reset_metafile_resv(struct xfs_scrub *sc);
#else
#define xrep_ino_dqattach(sc) (0)
-#define xrep_will_attempt(sc) (false)
+
+/*
+ * When online repair is not built into the kernel, we still want to attempt
+ * the repair so that the stub xrep_attempt below will return EOPNOTSUPP.
+ */
+static inline bool xrep_will_attempt(const struct xfs_scrub *sc)
+{
+ return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ xchk_needs_repair(sc->sm);
+}
static inline int
xrep_attempt(
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 7567dd5cad14..6fa9e3e5bab7 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -149,6 +149,18 @@ xchk_probe(
if (xchk_should_terminate(sc, &error))
return error;
+ /*
+ * If the caller is probing to see if repair works but repair isn't
+ * built into the kernel, return EOPNOTSUPP because that's the signal
+ * that userspace expects. If online repair is built in, set the
+ * CORRUPT flag (without any of the usual tracing/logging) to force us
+ * into xrep_probe.
+ */
+ if (xchk_could_repair(sc)) {
+ if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR))
+ return -EOPNOTSUPP;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
return 0;
}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 67877c36ed11..6d9965b546cb 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -19,6 +19,7 @@
#include "xfs_reflink.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
+#include "xfs_icache.h"
struct xfs_writepage_ctx {
struct iomap_writepage_ctx ctx;
@@ -528,12 +529,44 @@ xfs_vm_readahead(
}
static int
-xfs_iomap_swapfile_activate(
+xfs_vm_swap_activate(
struct swap_info_struct *sis,
struct file *swap_file,
sector_t *span)
{
- sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
+ struct xfs_inode *ip = XFS_I(file_inode(swap_file));
+
+ /*
+ * Swap file activation can race against concurrent shared extent
+ * removal in files that have been cloned. If this happens,
+ * iomap_swapfile_iter() can fail because it encountered a shared
+ * extent even though an operation is in progress to remove those
+ * shared extents.
+ *
+ * This race becomes problematic when we defer extent removal
+ * operations beyond the end of a syscall (i.e. use async background
+ * processing algorithms). Users think the extents are no longer
+ * shared, but iomap_swapfile_iter() still sees them as shared
+ * because the refcountbt entries for the extents being removed have
+ * not yet been updated. Hence the swapon call fails unexpectedly.
+ *
+ * The race condition is currently most obvious from the unlink()
+ * operation as extent removal is deferred until after the last
+ * reference to the inode goes away. We then process the extent
+ * removal asynchronously, hence triggers the "syscall completed but
+ * work not done" condition mentioned above. To close this race
+ * window, we need to flush any pending inodegc operations to ensure
+ * they have updated the refcountbt records before we try to map the
+ * swapfile.
+ */
+ xfs_inodegc_flush(ip->i_mount);
+
+ /*
+ * Direct the swap code to the correct block device when this file
+ * sits on the RT device.
+ */
+ sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
+
return iomap_swapfile_activate(sis, swap_file, span,
&xfs_read_iomap_ops);
}
@@ -549,11 +582,11 @@ const struct address_space_operations xfs_address_space_operations = {
.migrate_folio = filemap_migrate_folio,
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
- .swap_activate = xfs_iomap_swapfile_activate,
+ .swap_activate = xfs_vm_swap_activate,
};
const struct address_space_operations xfs_dax_aops = {
.writepages = xfs_dax_writepages,
.dirty_folio = noop_dirty_folio,
- .swap_activate = xfs_iomap_swapfile_activate,
+ .swap_activate = xfs_vm_swap_activate,
};
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d1d4a0a22e13..15bb790359f8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
*
* xfs_buf_rele:
* b_lock
- * pag_buf_lock
- * lru_lock
+ * lru_lock
*
* xfs_buftarg_drain_rele
* lru_lock
@@ -220,23 +219,25 @@ _xfs_buf_alloc(
*/
flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
- spin_lock_init(&bp->b_lock);
+ /*
+ * A new buffer is held and locked by the owner. This ensures that the
+ * buffer is owned by the caller and racing RCU lookups right after
+ * inserting into the hash table are safe (and will have to wait for
+ * the unlock to do anything non-trivial).
+ */
bp->b_hold = 1;
+ sema_init(&bp->b_sema, 0); /* held, no waiters */
+
+ spin_lock_init(&bp->b_lock);
atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
INIT_LIST_HEAD(&bp->b_li_list);
- sema_init(&bp->b_sema, 0); /* held, no waiters */
bp->b_target = target;
bp->b_mount = target->bt_mount;
bp->b_flags = flags;
- /*
- * Set length and io_length to the same value initially.
- * I/O routines should use io_length, which will be the same in
- * most cases but may be reset (e.g. XFS recovery).
- */
error = xfs_buf_get_maps(bp, nmaps);
if (error) {
kmem_cache_free(xfs_buf_cache, bp);
@@ -502,7 +503,6 @@ int
xfs_buf_cache_init(
struct xfs_buf_cache *bch)
{
- spin_lock_init(&bch->bc_lock);
return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
}
@@ -652,17 +652,20 @@ xfs_buf_find_insert(
if (error)
goto out_free_buf;
- spin_lock(&bch->bc_lock);
+ /* The new buffer keeps the perag reference until it is freed. */
+ new_bp->b_pag = pag;
+
+ rcu_read_lock();
bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
&new_bp->b_rhash_head, xfs_buf_hash_params);
if (IS_ERR(bp)) {
+ rcu_read_unlock();
error = PTR_ERR(bp);
- spin_unlock(&bch->bc_lock);
goto out_free_buf;
}
if (bp && xfs_buf_try_hold(bp)) {
/* found an existing buffer */
- spin_unlock(&bch->bc_lock);
+ rcu_read_unlock();
error = xfs_buf_find_lock(bp, flags);
if (error)
xfs_buf_rele(bp);
@@ -670,10 +673,8 @@ xfs_buf_find_insert(
*bpp = bp;
goto out_free_buf;
}
+ rcu_read_unlock();
- /* The new buffer keeps the perag reference until it is freed. */
- new_bp->b_pag = pag;
- spin_unlock(&bch->bc_lock);
*bpp = new_bp;
return 0;
@@ -1090,7 +1091,6 @@ xfs_buf_rele_cached(
}
/* we are asked to drop the last reference */
- spin_lock(&bch->bc_lock);
__xfs_buf_ioacct_dec(bp);
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
@@ -1102,7 +1102,6 @@ xfs_buf_rele_cached(
bp->b_state &= ~XFS_BSTATE_DISPOSE;
else
bp->b_hold--;
- spin_unlock(&bch->bc_lock);
} else {
bp->b_hold--;
/*
@@ -1120,7 +1119,6 @@ xfs_buf_rele_cached(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
xfs_buf_hash_params);
- spin_unlock(&bch->bc_lock);
if (pag)
xfs_perag_put(pag);
freebuf = true;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7e73663c5d4a..3b4ed42e11c0 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
struct xfs_buf_cache {
- spinlock_t bc_lock;
struct rhashtable bc_hash;
};
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index f340a2015c4c..0b41bdfecdfb 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -329,22 +329,6 @@ out_trans_cancel:
* successfully but before locks are dropped.
*/
-/* Verify that we have security clearance to perform this operation. */
-static int
-xfs_exchange_range_verify_area(
- struct xfs_exchrange *fxr)
-{
- int ret;
-
- ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
- true);
- if (ret)
- return ret;
-
- return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
- true);
-}
-
/*
* Performs necessary checks before doing a range exchange, having stabilized
* mutable inode attributes via i_rwsem.
@@ -355,11 +339,13 @@ xfs_exchange_range_checks(
unsigned int alloc_unit)
{
struct inode *inode1 = file_inode(fxr->file1);
+ loff_t size1 = i_size_read(inode1);
struct inode *inode2 = file_inode(fxr->file2);
+ loff_t size2 = i_size_read(inode2);
uint64_t allocmask = alloc_unit - 1;
int64_t test_len;
uint64_t blen;
- loff_t size1, size2, tmp;
+ loff_t tmp;
int error;
/* Don't touch certain kinds of inodes */
@@ -368,24 +354,25 @@ xfs_exchange_range_checks(
if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
return -ETXTBSY;
- size1 = i_size_read(inode1);
- size2 = i_size_read(inode2);
-
/* Ranges cannot start after EOF. */
if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
return -EINVAL;
- /*
- * If the caller said to exchange to EOF, we set the length of the
- * request large enough to cover everything to the end of both files.
- */
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
+ /*
+ * If the caller said to exchange to EOF, we set the length of
+ * the request large enough to cover everything to the end of
+ * both files.
+ */
fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
size2 - fxr->file2_offset);
-
- error = xfs_exchange_range_verify_area(fxr);
- if (error)
- return error;
+ } else {
+ /*
+ * Otherwise we require both ranges to end within EOF.
+ */
+ if (fxr->file1_offset + fxr->length > size1 ||
+ fxr->file2_offset + fxr->length > size2)
+ return -EINVAL;
}
/*
@@ -402,15 +389,6 @@ xfs_exchange_range_checks(
return -EINVAL;
/*
- * We require both ranges to end within EOF, unless we're exchanging
- * to EOF.
- */
- if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
- (fxr->file1_offset + fxr->length > size1 ||
- fxr->file2_offset + fxr->length > size2))
- return -EINVAL;
-
- /*
* Make sure we don't hit any file size limits. If we hit any size
* limits such that test_length was adjusted, we abort the whole
* operation.
@@ -747,6 +725,7 @@ xfs_exchange_range(
{
struct inode *inode1 = file_inode(fxr->file1);
struct inode *inode2 = file_inode(fxr->file2);
+ loff_t check_len = fxr->length;
int ret;
BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
@@ -779,14 +758,18 @@ xfs_exchange_range(
return -EBADF;
/*
- * If we're not exchanging to EOF, we can check the areas before
- * stabilizing both files' i_size.
+ * If we're exchanging to EOF we can't calculate the length until taking
+ * the iolock. Pass a 0 length to remap_verify_area similar to the
+ * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well.
*/
- if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
- ret = xfs_exchange_range_verify_area(fxr);
- if (ret)
- return ret;
- }
+ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
+ check_len = 0;
+ ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
+ if (ret)
+ return ret;
+ ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
+ if (ret)
+ return ret;
/* Update cmtime if the fd/inode don't forbid it. */
if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c95fe1b1de4e..b1f9f156ec88 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1404,8 +1404,11 @@ xfs_inactive(
goto out;
/* Try to clean out the cow blocks if there are any. */
- if (xfs_inode_has_cow_data(ip))
- xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+ if (xfs_inode_has_cow_data(ip)) {
+ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+ if (error)
+ goto out;
+ }
if (VFS_I(ip)->i_nlink != 0) {
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 50fa3ef89f6c..d61460309a78 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -976,10 +976,8 @@ xfs_dax_write_iomap_end(
if (!xfs_is_cow_inode(ip))
return 0;
- if (!written) {
- xfs_reflink_cancel_cow_range(ip, pos, length, true);
- return 0;
- }
+ if (!written)
+ return xfs_reflink_cancel_cow_range(ip, pos, length, true);
return xfs_reflink_end_cow(ip, pos, written);
}
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 37f1230e7584..245d754f382a 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -78,6 +78,28 @@ xfs_qm_statvfs(
}
}
+STATIC int
+xfs_qm_validate_state_change(
+ struct xfs_mount *mp,
+ uint uqd,
+ uint gqd,
+ uint pqd)
+{
+ int state;
+
+ /* Is quota state changing? */
+ state = ((uqd && !XFS_IS_UQUOTA_ON(mp)) ||
+ (!uqd && XFS_IS_UQUOTA_ON(mp)) ||
+ (gqd && !XFS_IS_GQUOTA_ON(mp)) ||
+ (!gqd && XFS_IS_GQUOTA_ON(mp)) ||
+ (pqd && !XFS_IS_PQUOTA_ON(mp)) ||
+ (!pqd && XFS_IS_PQUOTA_ON(mp)));
+
+ return state &&
+ (xfs_dev_is_read_only(mp, "changing quota state") ||
+ xfs_has_norecovery(mp));
+}
+
int
xfs_qm_newmount(
xfs_mount_t *mp,
@@ -97,24 +119,25 @@ xfs_qm_newmount(
}
/*
- * If the device itself is read-only, we can't allow
- * the user to change the state of quota on the mount -
- * this would generate a transaction on the ro device,
- * which would lead to an I/O error and shutdown
+ * If the device itself is read-only and/or in norecovery
+ * mode, we can't allow the user to change the state of
+ * quota on the mount - this would generate a transaction
+ * on the ro device, which would lead to an I/O error and
+ * shutdown.
*/
- if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
- (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) ||
- (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
- (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) ||
- (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
- (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) &&
- xfs_dev_is_read_only(mp, "changing quota state")) {
- xfs_warn(mp, "please mount with%s%s%s%s.",
- (!quotaondisk ? "out quota" : ""),
- (uquotaondisk ? " usrquota" : ""),
- (gquotaondisk ? " grpquota" : ""),
- (pquotaondisk ? " prjquota" : ""));
+ if (xfs_qm_validate_state_change(mp, uquotaondisk,
+ gquotaondisk, pquotaondisk)) {
+
+ if (xfs_has_metadir(mp))
+ xfs_warn(mp,
+ "metadir enabled, please mount without any quota mount options");
+ else
+ xfs_warn(mp, "please mount with%s%s%s%s.",
+ (!quotaondisk ? "out quota" : ""),
+ (uquotaondisk ? " usrquota" : ""),
+ (gquotaondisk ? " grpquota" : ""),
+ (pquotaondisk ? " prjquota" : ""));
return -EPERM;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d92d7a07ea89..0055066fb1d9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1661,8 +1661,12 @@ xfs_fs_fill_super(
#endif
}
- /* Filesystem claims it needs repair, so refuse the mount. */
- if (xfs_has_needsrepair(mp)) {
+ /*
+ * Filesystem claims it needs repair, so refuse the mount unless
+ * norecovery is also specified, in which case the filesystem can
+ * be mounted with no risk of further damage.
+ */
+ if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair.");
error = -EFSCORRUPTED;
goto out_free_sb;