summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c11
-rw-r--r--fs/btrfs/inode.c8
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/tree-log.c19
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/dax.c3
-rw-r--r--fs/erofs/Kconfig20
-rw-r--r--fs/erofs/super.c28
-rw-r--r--fs/erofs/zdata.c13
-rw-r--r--fs/nfsd/localio.c5
-rw-r--r--fs/nfsd/vfs.c10
-rw-r--r--fs/proc/task_mmu.c24
-rw-r--r--fs/smb/client/cifs_spnego.c47
-rw-r--r--fs/smb/client/cifsfs.h4
-rw-r--r--fs/smb/client/cifsglob.h21
-rw-r--r--fs/smb/client/cifstransport.c19
-rw-r--r--fs/smb/client/compress.c71
-rw-r--r--fs/smb/client/connect.c9
-rw-r--r--fs/smb/client/inode.c34
-rw-r--r--fs/smb/client/smb2ops.c15
-rw-r--r--fs/smb/client/smb2transport.c1
-rw-r--r--fs/smb/client/smbdirect.c10
-rw-r--r--fs/smb/client/transport.c7
-rw-r--r--fs/xfs/scrub/trace.h2
-rw-r--r--fs/xfs/xfs_file.c6
-rw-r--r--fs/xfs/xfs_inode.h11
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_iops.c5
-rw-r--r--fs/xfs/xfs_itable.c8
-rw-r--r--fs/xfs/xfs_itable.h10
-rw-r--r--fs/xfs/xfs_mount.c19
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_zone_alloc.c42
35 files changed, 312 insertions, 199 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 835b0deef9bb..f23d75986947 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4331,15 +4331,18 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1;
int ret;
- xa_lock_irq(&fs_info->buffer_tree);
+ rcu_read_lock();
xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) {
/*
* The same as try_release_extent_buffer(), to ensure the eb
* won't disappear out from under us.
*/
spin_lock(&eb->refs_lock);
+ rcu_read_unlock();
+
if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
spin_unlock(&eb->refs_lock);
+ rcu_read_lock();
continue;
}
@@ -4358,11 +4361,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* check the folio private at the end. And
* release_extent_buffer() will release the refs_lock.
*/
- xa_unlock_irq(&fs_info->buffer_tree);
release_extent_buffer(eb);
- xa_lock_irq(&fs_info->buffer_tree);
+ rcu_read_lock();
}
- xa_unlock_irq(&fs_info->buffer_tree);
+ rcu_read_unlock();
/*
* Finally to check if we have cleared folio private, as if we have
@@ -4375,7 +4377,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
ret = 0;
spin_unlock(&folio->mapping->i_private_lock);
return ret;
-
}
int try_release_extent_buffer(struct folio *folio)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b77dd22b8cdb..d740910e071a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -401,10 +401,12 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
while (index <= end_index) {
folio = filemap_get_folio(inode->vfs_inode.i_mapping, index);
- index++;
- if (IS_ERR(folio))
+ if (IS_ERR(folio)) {
+ index++;
continue;
+ }
+ index = folio_end(folio) >> PAGE_SHIFT;
/*
* Here we just clear all Ordered bits for every page in the
* range, then btrfs_mark_ordered_io_finished() will handle
@@ -2013,7 +2015,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
* cleaered by the caller.
*/
if (ret < 0)
- btrfs_cleanup_ordered_extents(inode, file_pos, end);
+ btrfs_cleanup_ordered_extents(inode, file_pos, len);
return ret;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1a5972178b3a..ccaa9a3cf1ce 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1453,7 +1453,6 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root,
struct btrfs_qgroup *src, int sign)
{
struct btrfs_qgroup *qgroup;
- struct btrfs_qgroup *cur;
LIST_HEAD(qgroup_list);
u64 num_bytes = src->excl;
int ret = 0;
@@ -1463,7 +1462,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root,
goto out;
qgroup_iterator_add(&qgroup_list, qgroup);
- list_for_each_entry(cur, &qgroup_list, iterator) {
+ list_for_each_entry(qgroup, &qgroup_list, iterator) {
struct btrfs_qgroup_list *glist;
qgroup->rfer += sign * num_bytes;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e58151933844..7256f6748c8f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -602,6 +602,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
if (btrfs_root_id(root) == objectid) {
u64 commit_root_gen;
+ /*
+ * Relocation will wait for cleaner thread, and any half-dropped
+ * subvolume will be fully cleaned up at mount time.
+ * So here we shouldn't hit a subvolume with non-zero drop_progress.
+ *
+ * If this isn't the case, error out since it can make us attempt to
+ * drop references for extents that were already dropped before.
+ */
+ if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) {
+ struct btrfs_key cpu_key;
+
+ btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress);
+ btrfs_err(fs_info,
+ "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)",
+ objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset);
+ ret = -EUCLEAN;
+ goto fail;
+ }
+
/* called by btrfs_init_reloc_root */
ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
BTRFS_TREE_RELOC_OBJECTID);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2186e87fb61b..69e11557fd13 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2605,14 +2605,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
/*
* Correctly adjust the reserved bytes occupied by a log tree extent buffer
*/
-static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
+static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
{
struct btrfs_block_group *cache;
cache = btrfs_lookup_block_group(fs_info, start);
if (!cache) {
btrfs_err(fs_info, "unable to find block group for %llu", start);
- return;
+ return -ENOENT;
}
spin_lock(&cache->space_info->lock);
@@ -2623,27 +2623,22 @@ static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
spin_unlock(&cache->space_info->lock);
btrfs_put_block_group(cache);
+
+ return 0;
}
static int clean_log_buffer(struct btrfs_trans_handle *trans,
struct extent_buffer *eb)
{
- int ret;
-
btrfs_tree_lock(eb);
btrfs_clear_buffer_dirty(trans, eb);
wait_on_extent_buffer_writeback(eb);
btrfs_tree_unlock(eb);
- if (trans) {
- ret = btrfs_pin_reserved_extent(trans, eb);
- if (ret)
- return ret;
- } else {
- unaccount_log_buffer(eb->fs_info, eb->start);
- }
+ if (trans)
+ return btrfs_pin_reserved_extent(trans, eb);
- return 0;
+ return unaccount_log_buffer(eb->fs_info, eb->start);
}
static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 245e813ecd78..db11b5b5f0e6 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2650,7 +2650,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
spin_lock(&block_group->lock);
if (block_group->reserved || block_group->alloc_offset == 0 ||
- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
+ !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) ||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
continue;
diff --git a/fs/dax.c b/fs/dax.c
index 4229513806be..20ecf652c129 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1743,6 +1743,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
loff_t done = 0;
int ret;
+ if (WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC))
+ return -EIO;
+
if (!iomi.len)
return 0;
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 7b26efc271ee..d81f3318417d 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -3,8 +3,18 @@
config EROFS_FS
tristate "EROFS filesystem support"
depends on BLOCK
+ select CACHEFILES if EROFS_FS_ONDEMAND
select CRC32
+ select CRYPTO if EROFS_FS_ZIP_ACCEL
+ select CRYPTO_DEFLATE if EROFS_FS_ZIP_ACCEL
select FS_IOMAP
+ select LZ4_DECOMPRESS if EROFS_FS_ZIP
+ select NETFS_SUPPORT if EROFS_FS_ONDEMAND
+ select XXHASH if EROFS_FS_XATTR
+ select XZ_DEC if EROFS_FS_ZIP_LZMA
+ select XZ_DEC_MICROLZMA if EROFS_FS_ZIP_LZMA
+ select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE
+ select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD
help
EROFS (Enhanced Read-Only File System) is a lightweight read-only
file system with modern designs (e.g. no buffer heads, inline
@@ -38,7 +48,6 @@ config EROFS_FS_DEBUG
config EROFS_FS_XATTR
bool "EROFS extended attributes"
depends on EROFS_FS
- select XXHASH
default y
help
Extended attributes are name:value pairs associated with inodes by
@@ -94,7 +103,6 @@ config EROFS_FS_BACKED_BY_FILE
config EROFS_FS_ZIP
bool "EROFS Data Compression Support"
depends on EROFS_FS
- select LZ4_DECOMPRESS
default y
help
Enable transparent compression support for EROFS file systems.
@@ -104,8 +112,6 @@ config EROFS_FS_ZIP
config EROFS_FS_ZIP_LZMA
bool "EROFS LZMA compressed data support"
depends on EROFS_FS_ZIP
- select XZ_DEC
- select XZ_DEC_MICROLZMA
help
Saying Y here includes support for reading EROFS file systems
containing LZMA compressed data, specifically called microLZMA. It
@@ -117,7 +123,6 @@ config EROFS_FS_ZIP_LZMA
config EROFS_FS_ZIP_DEFLATE
bool "EROFS DEFLATE compressed data support"
depends on EROFS_FS_ZIP
- select ZLIB_INFLATE
help
Saying Y here includes support for reading EROFS file systems
containing DEFLATE compressed data. It gives better compression
@@ -132,7 +137,6 @@ config EROFS_FS_ZIP_DEFLATE
config EROFS_FS_ZIP_ZSTD
bool "EROFS Zstandard compressed data support"
depends on EROFS_FS_ZIP
- select ZSTD_DECOMPRESS
help
Saying Y here includes support for reading EROFS file systems
containing Zstandard compressed data. It gives better compression
@@ -147,8 +151,6 @@ config EROFS_FS_ZIP_ZSTD
config EROFS_FS_ZIP_ACCEL
bool "EROFS hardware decompression support"
depends on EROFS_FS_ZIP
- select CRYPTO
- select CRYPTO_DEFLATE
help
Saying Y here includes hardware accelerator support for reading
EROFS file systems containing compressed data. It gives better
@@ -163,9 +165,7 @@ config EROFS_FS_ZIP_ACCEL
config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support (deprecated)"
depends on EROFS_FS
- select NETFS_SUPPORT
select FSCACHE
- select CACHEFILES
select CACHEFILES_ONDEMAND
help
This permits EROFS to use fscache-backed data blobs with on-demand
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index e1020aa60771..1b529ace4db0 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -174,6 +174,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
if (!erofs_is_fileio_mode(sbi)) {
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file),
&dif->dax_part_off, NULL, NULL);
+ if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) {
+ erofs_info(sb, "DAX unsupported by %s. Turning off DAX.",
+ dif->path);
+ clear_opt(&sbi->opt, DAX_ALWAYS);
+ }
} else if (!S_ISREG(file_inode(file)->i_mode)) {
fput(file);
return -EINVAL;
@@ -210,8 +215,13 @@ static int erofs_scan_devices(struct super_block *sb,
ondisk_extradevs, sbi->devs->extra_devices);
return -EINVAL;
}
- if (!ondisk_extradevs)
+ if (!ondisk_extradevs) {
+ if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) {
+ erofs_info(sb, "DAX unsupported by block device. Turning off DAX.");
+ clear_opt(&sbi->opt, DAX_ALWAYS);
+ }
return 0;
+ }
if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb))
sbi->devs->flatdev = true;
@@ -313,8 +323,8 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) {
sbi->root_nid = le64_to_cpu(dsb->rootnid_8b);
- sbi->dif0.blocks = (sbi->dif0.blocks << 32) |
- le16_to_cpu(dsb->rb.blocks_hi);
+ sbi->dif0.blocks = sbi->dif0.blocks |
+ ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32);
} else {
sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b);
}
@@ -338,7 +348,6 @@ static int erofs_read_superblock(struct super_block *sb)
if (ret < 0)
goto out;
- /* handle multiple devices */
ret = erofs_scan_devices(sb, dsb);
if (erofs_sb_has_48bit(sbi))
@@ -671,14 +680,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return invalfc(fc, "cannot use fsoffset in fscache mode");
}
- if (test_opt(&sbi->opt, DAX_ALWAYS)) {
- if (!sbi->dif0.dax_dev) {
- errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
- clear_opt(&sbi->opt, DAX_ALWAYS);
- } else if (sbi->blkszbits != PAGE_SHIFT) {
- errorfc(fc, "unsupported blocksize for DAX");
- clear_opt(&sbi->opt, DAX_ALWAYS);
- }
+ if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) {
+ erofs_info(sb, "unsupported blocksize for DAX");
+ clear_opt(&sbi->opt, DAX_ALWAYS);
}
sb->s_time_gran = 1;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 792f20888a8f..2d73297003d2 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1432,6 +1432,16 @@ static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work)
}
#endif
+/* Use (kthread_)work in atomic contexts to minimize scheduling overhead */
+static inline bool z_erofs_in_atomic(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPTION) && rcu_preempt_depth())
+ return true;
+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ return true;
+ return !preemptible();
+}
+
static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
int bios)
{
@@ -1446,8 +1456,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (atomic_add_return(bios, &io->pending_bios))
return;
- /* Use (kthread_)work and sync decompression for atomic contexts only */
- if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) {
+ if (z_erofs_in_atomic()) {
#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
struct kthread_worker *worker;
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 4f6468eb2adf..cb237f1b902a 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -103,10 +103,11 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (nfsd_file_get(new) == NULL)
goto again;
/*
- * Drop the ref we were going to install and the
- * one we were going to return.
+ * Drop the ref we were going to install (both file and
+ * net) and the one we were going to return (only file).
*/
nfsd_file_put(localio);
+ nfsd_net_put(net);
nfsd_file_put(localio);
localio = new;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 98ab55ba3ced..edf050766e57 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -470,7 +470,15 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
if (!iap->ia_valid)
return 0;
- iap->ia_valid |= ATTR_CTIME;
+ /*
+ * If ATTR_DELEG is set, then this is an update from a client that
+ * holds a delegation. If this is an update for only the atime, the
+ * ctime should not be changed. If the update contains the mtime
+ * too, then ATTR_CTIME should already be set.
+ */
+ if (!(iap->ia_valid & ATTR_DELEG))
+ iap->ia_valid |= ATTR_CTIME;
+
return notify_change(&nop_mnt_idmap, dentry, iap, NULL);
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3d6d8a9f13fc..29cca0e6d0ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -340,8 +340,8 @@ static int proc_maps_open(struct inode *inode, struct file *file,
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR_OR_NULL(priv->mm)) {
- int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
+ if (IS_ERR(priv->mm)) {
+ int err = PTR_ERR(priv->mm);
seq_release_private(inode, file);
return err;
@@ -1148,10 +1148,13 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
- pte_t ptent = huge_ptep_get(walk->mm, addr, pte);
struct folio *folio = NULL;
bool present = false;
+ spinlock_t *ptl;
+ pte_t ptent;
+ ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
+ ptent = huge_ptep_get(walk->mm, addr, pte);
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
present = true;
@@ -1170,6 +1173,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
}
+ spin_unlock(ptl);
return 0;
}
#else
@@ -2017,12 +2021,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
struct pagemapread *pm = walk->private;
struct vm_area_struct *vma = walk->vma;
u64 flags = 0, frame = 0;
+ spinlock_t *ptl;
int err = 0;
pte_t pte;
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
+ ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep);
pte = huge_ptep_get(walk->mm, addr, ptep);
if (pte_present(pte)) {
struct folio *folio = page_folio(pte_page(pte));
@@ -2050,11 +2056,12 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
err = add_to_pagemap(&pme, pm);
if (err)
- return err;
+ break;
if (pm->show_pfn && (flags & PM_PRESENT))
frame++;
}
+ spin_unlock(ptl);
cond_resched();
return err;
@@ -3128,17 +3135,22 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
- pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte);
+ pte_t huge_pte;
struct numa_maps *md;
struct page *page;
+ spinlock_t *ptl;
+ ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+ huge_pte = huge_ptep_get(walk->mm, addr, pte);
if (!pte_present(huge_pte))
- return 0;
+ goto out;
page = pte_page(huge_pte);
md = walk->private;
gather_stats(page, md, pte_dirty(huge_pte), 1);
+out:
+ spin_unlock(ptl);
return 0;
}
diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c
index bc1c1e9b288a..43b86fa4d695 100644
--- a/fs/smb/client/cifs_spnego.c
+++ b/fs/smb/client/cifs_spnego.c
@@ -124,55 +124,44 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo,
dp = description;
/* start with version and hostname portion of UNC string */
spnego_key = ERR_PTR(-EINVAL);
- sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION,
- hostname);
- dp = description + strlen(description);
+ dp += sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION,
+ hostname);
/* add the server address */
if (server->dstaddr.ss_family == AF_INET)
- sprintf(dp, "ip4=%pI4", &sa->sin_addr);
+ dp += sprintf(dp, "ip4=%pI4", &sa->sin_addr);
else if (server->dstaddr.ss_family == AF_INET6)
- sprintf(dp, "ip6=%pI6", &sa6->sin6_addr);
+ dp += sprintf(dp, "ip6=%pI6", &sa6->sin6_addr);
else
goto out;
- dp = description + strlen(description);
-
/* for now, only sec=krb5 and sec=mskrb5 and iakerb are valid */
if (server->sec_kerberos)
- sprintf(dp, ";sec=krb5");
+ dp += sprintf(dp, ";sec=krb5");
else if (server->sec_mskerberos)
- sprintf(dp, ";sec=mskrb5");
+ dp += sprintf(dp, ";sec=mskrb5");
else if (server->sec_iakerb)
- sprintf(dp, ";sec=iakerb");
+ dp += sprintf(dp, ";sec=iakerb");
else {
cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n");
- sprintf(dp, ";sec=krb5");
+ dp += sprintf(dp, ";sec=krb5");
}
- dp = description + strlen(description);
- sprintf(dp, ";uid=0x%x",
- from_kuid_munged(&init_user_ns, sesInfo->linux_uid));
+ dp += sprintf(dp, ";uid=0x%x",
+ from_kuid_munged(&init_user_ns, sesInfo->linux_uid));
- dp = description + strlen(description);
- sprintf(dp, ";creduid=0x%x",
+ dp += sprintf(dp, ";creduid=0x%x",
from_kuid_munged(&init_user_ns, sesInfo->cred_uid));
- if (sesInfo->user_name) {
- dp = description + strlen(description);
- sprintf(dp, ";user=%s", sesInfo->user_name);
- }
+ if (sesInfo->user_name)
+ dp += sprintf(dp, ";user=%s", sesInfo->user_name);
- dp = description + strlen(description);
- sprintf(dp, ";pid=0x%x", current->pid);
+ dp += sprintf(dp, ";pid=0x%x", current->pid);
- if (sesInfo->upcall_target == UPTARGET_MOUNT) {
- dp = description + strlen(description);
- sprintf(dp, ";upcall_target=mount");
- } else {
- dp = description + strlen(description);
- sprintf(dp, ";upcall_target=app");
- }
+ if (sesInfo->upcall_target == UPTARGET_MOUNT)
+ dp += sprintf(dp, ";upcall_target=mount");
+ else
+ dp += sprintf(dp, ";upcall_target=app");
cifs_dbg(FYI, "key description = %s\n", description);
saved_cred = override_creds(spnego_cred);
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 487f39cff77e..3ce7c614ccc0 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 55
-#define CIFS_VERSION "2.55"
+#define SMB3_PRODUCT_BUILD 56
+#define CIFS_VERSION "2.56"
#endif /* _CIFSFS_H */
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index e6830ab3a546..1e64a4fb6af0 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1732,6 +1732,7 @@ struct mid_q_entry {
int mid_rc; /* rc for MID_RC */
__le16 command; /* smb command code */
unsigned int optype; /* operation type */
+ spinlock_t mid_lock;
bool wait_cancelled:1; /* Cancelled while waiting for response */
bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */
bool large_buf:1; /* if valid response, is pointer to large buf */
@@ -2036,6 +2037,9 @@ require use of the stronger protocol */
* cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo
* ->invalidHandle initiate_cifs_search
* ->oplock_break_cancelled
+ * mid_q_entry->mid_lock mid_q_entry->callback alloc_mid
+ * smb2_mid_entry_alloc
+ * (Any fields of mid_q_entry that will need protection)
****************************************************************************/
#ifdef DECLARE_GLOBALS_HERE
@@ -2375,6 +2379,23 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen)
return ret;
}
+/*
+ * Execute mid callback atomically - ensures callback runs exactly once
+ * and prevents sleeping in atomic context.
+ */
+static inline void mid_execute_callback(struct mid_q_entry *mid)
+{
+ void (*callback)(struct mid_q_entry *mid);
+
+ spin_lock(&mid->mid_lock);
+ callback = mid->callback;
+ mid->callback = NULL; /* Mark as executed, */
+ spin_unlock(&mid->mid_lock);
+
+ if (callback)
+ callback(mid);
+}
+
#define CIFS_REPARSE_SUPPORT(tcon) \
((tcon)->posix_extensions || \
(le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \
diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c
index 352dafb888dd..e98b95eff8c9 100644
--- a/fs/smb/client/cifstransport.c
+++ b/fs/smb/client/cifstransport.c
@@ -46,6 +46,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
memset(temp, 0, sizeof(struct mid_q_entry));
kref_init(&temp->refcount);
+ spin_lock_init(&temp->mid_lock);
temp->mid = get_mid(smb_buffer);
temp->pid = current->pid;
temp->command = cpu_to_le16(smb_buffer->Command);
@@ -345,16 +346,15 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
rc = wait_for_response(server, midQ);
if (rc != 0) {
send_cancel(server, &rqst, midQ);
- spin_lock(&server->mid_queue_lock);
- if (midQ->mid_state == MID_REQUEST_SUBMITTED ||
- midQ->mid_state == MID_RESPONSE_RECEIVED) {
+ spin_lock(&midQ->mid_lock);
+ if (midQ->callback) {
/* no longer considered to be "in-flight" */
midQ->callback = release_mid;
- spin_unlock(&server->mid_queue_lock);
+ spin_unlock(&midQ->mid_lock);
add_credits(server, &credits, 0);
return rc;
}
- spin_unlock(&server->mid_queue_lock);
+ spin_unlock(&midQ->mid_lock);
}
rc = cifs_sync_mid_result(midQ, server);
@@ -527,15 +527,14 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
rc = wait_for_response(server, midQ);
if (rc) {
send_cancel(server, &rqst, midQ);
- spin_lock(&server->mid_queue_lock);
- if (midQ->mid_state == MID_REQUEST_SUBMITTED ||
- midQ->mid_state == MID_RESPONSE_RECEIVED) {
+ spin_lock(&midQ->mid_lock);
+ if (midQ->callback) {
/* no longer considered to be "in-flight" */
midQ->callback = release_mid;
- spin_unlock(&server->mid_queue_lock);
+ spin_unlock(&midQ->mid_lock);
return rc;
}
- spin_unlock(&server->mid_queue_lock);
+ spin_unlock(&midQ->mid_lock);
}
/* We got the response - restart system call. */
diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c
index 766b4de13da7..db709f5cd2e1 100644
--- a/fs/smb/client/compress.c
+++ b/fs/smb/client/compress.c
@@ -155,58 +155,29 @@ static int cmp_bkt(const void *_a, const void *_b)
}
/*
- * TODO:
- * Support other iter types, if required.
- * Only ITER_XARRAY is supported for now.
+ * Collect some 2K samples with 2K gaps between.
*/
-static int collect_sample(const struct iov_iter *iter, ssize_t max, u8 *sample)
+static int collect_sample(const struct iov_iter *source, ssize_t max, u8 *sample)
{
- struct folio *folios[16], *folio;
- unsigned int nr, i, j, npages;
- loff_t start = iter->xarray_start + iter->iov_offset;
- pgoff_t last, index = start / PAGE_SIZE;
- size_t len, off, foff;
- void *p;
- int s = 0;
-
- last = (start + max - 1) / PAGE_SIZE;
- do {
- nr = xa_extract(iter->xarray, (void **)folios, index, last, ARRAY_SIZE(folios),
- XA_PRESENT);
- if (nr == 0)
- return -EIO;
-
- for (i = 0; i < nr; i++) {
- folio = folios[i];
- npages = folio_nr_pages(folio);
- foff = start - folio_pos(folio);
- off = foff % PAGE_SIZE;
-
- for (j = foff / PAGE_SIZE; j < npages; j++) {
- size_t len2;
-
- len = min_t(size_t, max, PAGE_SIZE - off);
- len2 = min_t(size_t, len, SZ_2K);
-
- p = kmap_local_page(folio_page(folio, j));
- memcpy(&sample[s], p, len2);
- kunmap_local(p);
-
- s += len2;
-
- if (len2 < SZ_2K || s >= max - SZ_2K)
- return s;
-
- max -= len;
- if (max <= 0)
- return s;
-
- start += len;
- off = 0;
- index++;
- }
- }
- } while (nr == ARRAY_SIZE(folios));
+ struct iov_iter iter = *source;
+ size_t s = 0;
+
+ while (iov_iter_count(&iter) >= SZ_2K) {
+ size_t part = umin(umin(iov_iter_count(&iter), SZ_2K), max);
+ size_t n;
+
+ n = copy_from_iter(sample + s, part, &iter);
+ if (n != part)
+ return -EFAULT;
+
+ s += n;
+ max -= n;
+
+ if (iov_iter_count(&iter) < PAGE_SIZE - SZ_2K)
+ break;
+
+ iov_iter_advance(&iter, SZ_2K);
+ }
return s;
}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 587845a2452d..dd12f3eb61dc 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -335,7 +335,7 @@ cifs_abort_connection(struct TCP_Server_Info *server)
cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
list_for_each_entry_safe(mid, nmid, &retry_list, qhead) {
list_del_init(&mid->qhead);
- mid->callback(mid);
+ mid_execute_callback(mid);
release_mid(mid);
}
@@ -919,7 +919,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type)
list_del_init(&mid->qhead);
mid->mid_rc = mid_rc;
mid->mid_state = MID_RC;
- mid->callback(mid);
+ mid_execute_callback(mid);
release_mid(mid);
}
@@ -1117,7 +1117,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid);
list_del_init(&mid_entry->qhead);
- mid_entry->callback(mid_entry);
+ mid_execute_callback(mid_entry);
release_mid(mid_entry);
}
/* 1/8th of sec is more than enough time for them to exit */
@@ -1394,7 +1394,7 @@ next_pdu:
}
if (!mids[i]->multiRsp || mids[i]->multiEnd)
- mids[i]->callback(mids[i]);
+ mid_execute_callback(mids[i]);
release_mid(mids[i]);
} else if (server->ops->is_oplock_break &&
@@ -4205,7 +4205,6 @@ retry:
return 0;
}
- server->lstrp = jiffies;
server->tcpStatus = CifsInNegotiate;
server->neg_start = jiffies;
spin_unlock(&server->srv_lock);
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 75be4b46bc6f..fe453a4b3dc8 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1943,15 +1943,24 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct tcon_link *tlink;
struct cifs_tcon *tcon;
+ __u32 dosattr = 0, origattr = 0;
struct TCP_Server_Info *server;
struct iattr *attrs = NULL;
- __u32 dosattr = 0, origattr = 0;
+ bool rehash = false;
cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry);
if (unlikely(cifs_forced_shutdown(cifs_sb)))
return -EIO;
+ /* Unhash dentry in advance to prevent any concurrent opens */
+ spin_lock(&dentry->d_lock);
+ if (!d_unhashed(dentry)) {
+ __d_drop(dentry);
+ rehash = true;
+ }
+ spin_unlock(&dentry->d_lock);
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
@@ -2003,7 +2012,8 @@ psx_del_no_retry:
cifs_drop_nlink(inode);
}
} else if (rc == -ENOENT) {
- d_drop(dentry);
+ if (simple_positive(dentry))
+ d_delete(dentry);
} else if (rc == -EBUSY) {
if (server->ops->rename_pending_delete) {
rc = server->ops->rename_pending_delete(full_path,
@@ -2056,6 +2066,8 @@ unlink_out:
kfree(attrs);
free_xid(xid);
cifs_put_tlink(tlink);
+ if (rehash)
+ d_rehash(dentry);
return rc;
}
@@ -2462,6 +2474,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
struct cifs_tcon *tcon;
+ bool rehash = false;
unsigned int xid;
int rc, tmprc;
int retry_count = 0;
@@ -2477,6 +2490,17 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
if (unlikely(cifs_forced_shutdown(cifs_sb)))
return -EIO;
+ /*
+ * Prevent any concurrent opens on the target by unhashing the dentry.
+ * VFS already unhashes the target when renaming directories.
+ */
+ if (d_is_positive(target_dentry) && !d_is_dir(target_dentry)) {
+ if (!d_unhashed(target_dentry)) {
+ d_drop(target_dentry);
+ rehash = true;
+ }
+ }
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
@@ -2518,6 +2542,8 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
}
}
+ if (!rc)
+ rehash = false;
/*
* No-replace is the natural behavior for CIFS, so skip unlink hacks.
*/
@@ -2576,12 +2602,16 @@ unlink_target:
goto cifs_rename_exit;
rc = cifs_do_rename(xid, source_dentry, from_name,
target_dentry, to_name);
+ if (!rc)
+ rehash = false;
}
/* force revalidate to go get info when needed */
CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
cifs_rename_exit:
+ if (rehash)
+ d_rehash(target_dentry);
kfree(info_buf_source);
free_dentry_path(page2);
free_dentry_path(page1);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index ad8947434b71..3b251de874ec 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -772,6 +772,13 @@ next_iface:
bytes_left -= sizeof(*p);
break;
}
+ /* Validate that Next doesn't point beyond the buffer */
+ if (next > bytes_left) {
+ cifs_dbg(VFS, "%s: invalid Next pointer %zu > %zd\n",
+ __func__, next, bytes_left);
+ rc = -EINVAL;
+ goto out;
+ }
p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
bytes_left -= next;
}
@@ -783,7 +790,9 @@ next_iface:
}
/* Azure rounds the buffer size up 8, to a 16 byte boundary */
- if ((bytes_left > 8) || p->Next)
+ if ((bytes_left > 8) ||
+ (bytes_left >= offsetof(struct network_interface_info_ioctl_rsp, Next)
+ + sizeof(p->Next) && p->Next))
cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
ses->iface_last_update = jiffies;
@@ -4805,7 +4814,7 @@ static void smb2_decrypt_offload(struct work_struct *work)
dw->server->ops->is_network_name_deleted(dw->buf,
dw->server);
- mid->callback(mid);
+ mid_execute_callback(mid);
} else {
spin_lock(&dw->server->srv_lock);
if (dw->server->tcpStatus == CifsNeedReconnect) {
@@ -4813,7 +4822,7 @@ static void smb2_decrypt_offload(struct work_struct *work)
mid->mid_state = MID_RETRY_NEEDED;
spin_unlock(&dw->server->mid_queue_lock);
spin_unlock(&dw->server->srv_lock);
- mid->callback(mid);
+ mid_execute_callback(mid);
} else {
spin_lock(&dw->server->mid_queue_lock);
mid->mid_state = MID_REQUEST_SUBMITTED;
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index ff9ef7fcd010..bc0e92eb2b64 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -771,6 +771,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *shdr,
temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
memset(temp, 0, sizeof(struct mid_q_entry));
kref_init(&temp->refcount);
+ spin_lock_init(&temp->mid_lock);
temp->mid = le64_to_cpu(shdr->MessageId);
temp->credits = credits > 0 ? credits : 1;
temp->pid = current->pid;
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index c628e91c328b..02d6db431fd4 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -1337,10 +1337,6 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "cancelling idle timer\n");
cancel_delayed_work_sync(&info->idle_timer_work);
- log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
- wait_event(info->wait_send_pending,
- atomic_read(&info->send_pending) == 0);
-
/* It's not possible for upper layer to get to reassembly */
log_rdma_event(INFO, "drain the reassembly queue\n");
do {
@@ -1986,7 +1982,11 @@ int smbd_send(struct TCP_Server_Info *server,
*/
wait_event(info->wait_send_pending,
- atomic_read(&info->send_pending) == 0);
+ atomic_read(&info->send_pending) == 0 ||
+ sc->status != SMBDIRECT_SOCKET_CONNECTED);
+
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0)
+ rc = -EAGAIN;
return rc;
}
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 32d528b4dd83..a61ba7f3fb86 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1005,15 +1005,14 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(server, &rqst[i], midQ[i]);
- spin_lock(&server->mid_queue_lock);
+ spin_lock(&midQ[i]->mid_lock);
midQ[i]->wait_cancelled = true;
- if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED ||
- midQ[i]->mid_state == MID_RESPONSE_RECEIVED) {
+ if (midQ[i]->callback) {
midQ[i]->callback = cifs_cancelled_callback;
cancelled_mid[i] = true;
credits[i].value = 0;
}
- spin_unlock(&server->mid_queue_lock);
+ spin_unlock(&midQ[i]->mid_lock);
}
}
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 1e6e9c10cea2..a8187281eb96 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -479,7 +479,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class,
__field(xfs_exntst_t, state)
),
TP_fast_assign(
- __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev;
+ __entry->dev = cursor->sc->mp->m_super->s_dev;
__entry->dqtype = cursor->dqtype;
__entry->ino = cursor->quota_ip->i_ino;
__entry->cur_id = cursor->id;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 55a304cb3aef..f96fbf5c54c9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1101,9 +1101,6 @@ xfs_file_write_iter(
if (xfs_is_shutdown(ip->i_mount))
return -EIO;
- if (IS_DAX(inode))
- return xfs_file_dax_write(iocb, from);
-
if (iocb->ki_flags & IOCB_ATOMIC) {
if (ocount < xfs_get_atomic_write_min(ip))
return -EINVAL;
@@ -1116,6 +1113,9 @@ xfs_file_write_iter(
return ret;
}
+ if (IS_DAX(inode))
+ return xfs_file_dax_write(iocb, from);
+
if (iocb->ki_flags & IOCB_DIRECT) {
/*
* Allow a directio write to fall back to a buffered
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 07fbdcc4cbf5..bd6d33557194 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -358,9 +358,20 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
{
+ if (IS_DAX(VFS_IC(ip)))
+ return false;
+
return xfs_inode_buftarg(ip)->bt_awu_max > 0;
}
+static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip)
+{
+ if (IS_DAX(VFS_IC(ip)))
+ return false;
+
+ return xfs_can_sw_atomic_write(ip->i_mount);
+}
+
/*
* In-core inode flags.
*/
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index fe1f74a3b6a3..e1051a530a50 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -219,7 +219,7 @@ xfs_bulk_ireq_setup(
else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno)
return -EINVAL;
- breq->flags |= XFS_IBULK_SAME_AG;
+ breq->iwalk_flags |= XFS_IWALK_SAME_AG;
/* Asking for an inode past the end of the AG? We're done! */
if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 149b5460fbfd..603effabe1ee 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -616,7 +616,8 @@ xfs_get_atomic_write_min(
* write of exactly one single fsblock if the bdev will make that
* guarantee for us.
*/
- if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp))
+ if (xfs_inode_can_hw_atomic_write(ip) ||
+ xfs_inode_can_sw_atomic_write(ip))
return mp->m_sb.sb_blocksize;
return 0;
@@ -633,7 +634,7 @@ xfs_get_atomic_write_max(
* write of exactly one single fsblock if the bdev will make that
* guarantee for us.
*/
- if (!xfs_can_sw_atomic_write(mp)) {
+ if (!xfs_inode_can_sw_atomic_write(ip)) {
if (xfs_inode_can_hw_atomic_write(ip))
return mp->m_sb.sb_blocksize;
return 0;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c8c9b8d8309f..2aa37a4d2706 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -307,7 +307,6 @@ xfs_bulkstat(
.breq = breq,
};
struct xfs_trans *tp;
- unsigned int iwalk_flags = 0;
int error;
if (breq->idmap != &nop_mnt_idmap) {
@@ -328,10 +327,7 @@ xfs_bulkstat(
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- if (breq->flags & XFS_IBULK_SAME_AG)
- iwalk_flags |= XFS_IWALK_SAME_AG;
-
- error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags,
+ error = xfs_iwalk(breq->mp, tp, breq->startino, breq->iwalk_flags,
xfs_bulkstat_iwalk, breq->icount, &bc);
xfs_trans_cancel(tp);
kfree(bc.buf);
@@ -457,7 +453,7 @@ xfs_inumbers(
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->iwalk_flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index f10e8f8f2335..2d0612f14d6e 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -13,17 +13,15 @@ struct xfs_ibulk {
xfs_ino_t startino; /* start with this inode */
unsigned int icount; /* number of elements in ubuffer */
unsigned int ocount; /* number of records returned */
- unsigned int flags; /* see XFS_IBULK_FLAG_* */
+ unsigned int flags; /* XFS_IBULK_FLAG_* */
+ unsigned int iwalk_flags; /* XFS_IWALK_FLAG_* */
};
-/* Only iterate within the same AG as startino */
-#define XFS_IBULK_SAME_AG (1U << 0)
-
/* Fill out the bs_extents64 field if set. */
-#define XFS_IBULK_NREXT64 (1U << 1)
+#define XFS_IBULK_NREXT64 (1U << 0)
/* Signal that we can return metadata directories. */
-#define XFS_IBULK_METADIR (1U << 2)
+#define XFS_IBULK_METADIR (1U << 1)
/*
* Advance the user buffer pointer by one record of the given size. If the
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2133fbaf1766..dc32c5e34d81 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -779,6 +779,25 @@ xfs_set_max_atomic_write_opt(
return -EINVAL;
}
+ if (xfs_has_reflink(mp))
+ goto set_limit;
+
+ if (new_max_fsbs == 1) {
+ if (mp->m_ddev_targp->bt_awu_max ||
+ (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_awu_max)) {
+ } else {
+ xfs_warn(mp,
+ "cannot support atomic writes of size %lluk with no reflink or HW support",
+ new_max_bytes >> 10);
+ return -EINVAL;
+ }
+ } else {
+ xfs_warn(mp,
+ "cannot support atomic writes of size %lluk with no reflink support",
+ new_max_bytes >> 10);
+ return -EINVAL;
+ }
+
set_limit:
error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs);
if (error) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e1794e3e3156..ac344e42846c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -455,6 +455,7 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \
xfs_extlen_t len), \
TP_ARGS(oz, rgbno, len))
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks);
+DEFINE_ZONE_ALLOC_EVENT(xfs_zone_skip_blocks);
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks);
TRACE_EVENT(xfs_zone_gc_select_victim,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ece374d622b3..575e7028f423 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -253,8 +253,8 @@ xfs_trans_alloc(
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/
retry:
- WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
tp = __xfs_trans_alloc(mp, flags);
+ WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
error = xfs_trans_reserve(tp, resp, blocks, rtextents);
if (error == -ENOSPC && want_retry) {
xfs_trans_cancel(tp);
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 33f7eee521a8..f8bd6d741755 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -166,10 +166,9 @@ xfs_open_zone_mark_full(
static void
xfs_zone_record_blocks(
struct xfs_trans *tp,
- xfs_fsblock_t fsbno,
- xfs_filblks_t len,
struct xfs_open_zone *oz,
- bool used)
+ xfs_fsblock_t fsbno,
+ xfs_filblks_t len)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_rtgroup *rtg = oz->oz_rtg;
@@ -179,18 +178,37 @@ xfs_zone_record_blocks(
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
- if (used) {
- rmapip->i_used_blocks += len;
- ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
- } else {
- xfs_add_frextents(mp, len);
- }
+ rmapip->i_used_blocks += len;
+ ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
oz->oz_written += len;
if (oz->oz_written == rtg_blocks(rtg))
xfs_open_zone_mark_full(oz);
xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
}
+/*
+ * Called for blocks that have been written to disk, but not actually linked to
+ * an inode, which can happen when garbage collection races with user data
+ * writes to a file.
+ */
+static void
+xfs_zone_skip_blocks(
+ struct xfs_open_zone *oz,
+ xfs_filblks_t len)
+{
+ struct xfs_rtgroup *rtg = oz->oz_rtg;
+
+ trace_xfs_zone_skip_blocks(oz, 0, len);
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ oz->oz_written += len;
+ if (oz->oz_written == rtg_blocks(rtg))
+ xfs_open_zone_mark_full(oz);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+
+ xfs_add_frextents(rtg_mount(rtg), len);
+}
+
static int
xfs_zoned_map_extent(
struct xfs_trans *tp,
@@ -250,8 +268,7 @@ xfs_zoned_map_extent(
}
}
- xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz,
- true);
+ xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount);
/* Map the new blocks into the data fork. */
xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new);
@@ -259,8 +276,7 @@ xfs_zoned_map_extent(
skip:
trace_xfs_reflink_cow_remap_skip(ip, new);
- xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz,
- false);
+ xfs_zone_skip_blocks(oz, new->br_blockcount);
return 0;
}