diff options
Diffstat (limited to 'fs')
35 files changed, 312 insertions, 199 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 835b0deef9bb..f23d75986947 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4331,15 +4331,18 @@ static int try_release_subpage_extent_buffer(struct folio *folio) unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1; int ret; - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) { /* * The same as try_release_extent_buffer(), to ensure the eb * won't disappear out from under us. */ spin_lock(&eb->refs_lock); + rcu_read_unlock(); + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { spin_unlock(&eb->refs_lock); + rcu_read_lock(); continue; } @@ -4358,11 +4361,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ - xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); } - xa_unlock_irq(&fs_info->buffer_tree); + rcu_read_unlock(); /* * Finally to check if we have cleared folio private, as if we have @@ -4375,7 +4377,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) ret = 0; spin_unlock(&folio->mapping->i_private_lock); return ret; - } int try_release_extent_buffer(struct folio *folio) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b77dd22b8cdb..d740910e071a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -401,10 +401,12 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, while (index <= end_index) { folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); - index++; - if (IS_ERR(folio)) + if (IS_ERR(folio)) { + index++; continue; + } + index = folio_end(folio) >> PAGE_SHIFT; /* * Here we just clear all Ordered bits for every page in the * range, then btrfs_mark_ordered_io_finished() will handle @@ -2013,7 +2015,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio * cleaered by the caller. */ if (ret < 0) - btrfs_cleanup_ordered_extents(inode, file_pos, end); + btrfs_cleanup_ordered_extents(inode, file_pos, len); return ret; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1a5972178b3a..ccaa9a3cf1ce 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1453,7 +1453,6 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup *src, int sign) { struct btrfs_qgroup *qgroup; - struct btrfs_qgroup *cur; LIST_HEAD(qgroup_list); u64 num_bytes = src->excl; int ret = 0; @@ -1463,7 +1462,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, goto out; qgroup_iterator_add(&qgroup_list, qgroup); - list_for_each_entry(cur, &qgroup_list, iterator) { + list_for_each_entry(qgroup, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; qgroup->rfer += sign * num_bytes; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e58151933844..7256f6748c8f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -602,6 +602,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, if (btrfs_root_id(root) == objectid) { u64 commit_root_gen; + /* + * Relocation will wait for cleaner thread, and any half-dropped + * subvolume will be fully cleaned up at mount time. + * So here we shouldn't hit a subvolume with non-zero drop_progress. + * + * If this isn't the case, error out since it can make us attempt to + * drop references for extents that were already dropped before. + */ + if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) { + struct btrfs_key cpu_key; + + btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); + btrfs_err(fs_info, + "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", + objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); + ret = -EUCLEAN; + goto fail; + } + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2186e87fb61b..69e11557fd13 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2605,14 +2605,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* * Correctly adjust the reserved bytes occupied by a log tree extent buffer */ -static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) +static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, start); if (!cache) { btrfs_err(fs_info, "unable to find block group for %llu", start); - return; + return -ENOENT; } spin_lock(&cache->space_info->lock); @@ -2623,27 +2623,22 @@ static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) spin_unlock(&cache->space_info->lock); btrfs_put_block_group(cache); + + return 0; } static int clean_log_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *eb) { - int ret; - btrfs_tree_lock(eb); btrfs_clear_buffer_dirty(trans, eb); wait_on_extent_buffer_writeback(eb); btrfs_tree_unlock(eb); - if (trans) { - ret = btrfs_pin_reserved_extent(trans, eb); - if (ret) - return ret; - } else { - unaccount_log_buffer(eb->fs_info, eb->start); - } + if (trans) + return btrfs_pin_reserved_extent(trans, eb); - return 0; + return unaccount_log_buffer(eb->fs_info, eb->start); } static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 245e813ecd78..db11b5b5f0e6 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2650,7 +2650,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) spin_lock(&block_group->lock); if (block_group->reserved || block_group->alloc_offset == 0 || - (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || + !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); continue; @@ -1743,6 +1743,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t done = 0; int ret; + if (WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC)) + return -EIO; + if (!iomi.len) return 0; diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 7b26efc271ee..d81f3318417d 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,18 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CACHEFILES if EROFS_FS_ONDEMAND select CRC32 + select CRYPTO if EROFS_FS_ZIP_ACCEL + select CRYPTO_DEFLATE if EROFS_FS_ZIP_ACCEL select FS_IOMAP + select LZ4_DECOMPRESS if EROFS_FS_ZIP + select NETFS_SUPPORT if EROFS_FS_ONDEMAND + select XXHASH if EROFS_FS_XATTR + select XZ_DEC if EROFS_FS_ZIP_LZMA + select XZ_DEC_MICROLZMA if EROFS_FS_ZIP_LZMA + select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE + select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline @@ -38,7 +48,6 @@ config EROFS_FS_DEBUG config EROFS_FS_XATTR bool "EROFS extended attributes" depends on EROFS_FS - select XXHASH default y help Extended attributes are name:value pairs associated with inodes by @@ -94,7 +103,6 @@ config EROFS_FS_BACKED_BY_FILE config EROFS_FS_ZIP bool "EROFS Data Compression Support" depends on EROFS_FS - select LZ4_DECOMPRESS default y help Enable transparent compression support for EROFS file systems. @@ -104,8 +112,6 @@ config EROFS_FS_ZIP config EROFS_FS_ZIP_LZMA bool "EROFS LZMA compressed data support" depends on EROFS_FS_ZIP - select XZ_DEC - select XZ_DEC_MICROLZMA help Saying Y here includes support for reading EROFS file systems containing LZMA compressed data, specifically called microLZMA. It @@ -117,7 +123,6 @@ config EROFS_FS_ZIP_LZMA config EROFS_FS_ZIP_DEFLATE bool "EROFS DEFLATE compressed data support" depends on EROFS_FS_ZIP - select ZLIB_INFLATE help Saying Y here includes support for reading EROFS file systems containing DEFLATE compressed data. It gives better compression @@ -132,7 +137,6 @@ config EROFS_FS_ZIP_DEFLATE config EROFS_FS_ZIP_ZSTD bool "EROFS Zstandard compressed data support" depends on EROFS_FS_ZIP - select ZSTD_DECOMPRESS help Saying Y here includes support for reading EROFS file systems containing Zstandard compressed data. It gives better compression @@ -147,8 +151,6 @@ config EROFS_FS_ZIP_ZSTD config EROFS_FS_ZIP_ACCEL bool "EROFS hardware decompression support" depends on EROFS_FS_ZIP - select CRYPTO - select CRYPTO_DEFLATE help Saying Y here includes hardware accelerator support for reading EROFS file systems containing compressed data. It gives better @@ -163,9 +165,7 @@ config EROFS_FS_ZIP_ACCEL config EROFS_FS_ONDEMAND bool "EROFS fscache-based on-demand read support (deprecated)" depends on EROFS_FS - select NETFS_SUPPORT select FSCACHE - select CACHEFILES select CACHEFILES_ONDEMAND help This permits EROFS to use fscache-backed data blobs with on-demand diff --git a/fs/erofs/super.c b/fs/erofs/super.c index e1020aa60771..1b529ace4db0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -174,6 +174,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), &dif->dax_part_off, NULL, NULL); + if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) { + erofs_info(sb, "DAX unsupported by %s. Turning off DAX.", + dif->path); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } else if (!S_ISREG(file_inode(file)->i_mode)) { fput(file); return -EINVAL; @@ -210,8 +215,13 @@ static int erofs_scan_devices(struct super_block *sb, ondisk_extradevs, sbi->devs->extra_devices); return -EINVAL; } - if (!ondisk_extradevs) + if (!ondisk_extradevs) { + if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) { + erofs_info(sb, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } return 0; + } if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb)) sbi->devs->flatdev = true; @@ -313,8 +323,8 @@ static int erofs_read_superblock(struct super_block *sb) sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); - sbi->dif0.blocks = (sbi->dif0.blocks << 32) | - le16_to_cpu(dsb->rb.blocks_hi); + sbi->dif0.blocks = sbi->dif0.blocks | + ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32); } else { sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); } @@ -338,7 +348,6 @@ static int erofs_read_superblock(struct super_block *sb) if (ret < 0) goto out; - /* handle multiple devices */ ret = erofs_scan_devices(sb, dsb); if (erofs_sb_has_48bit(sbi)) @@ -671,14 +680,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return invalfc(fc, "cannot use fsoffset in fscache mode"); } - if (test_opt(&sbi->opt, DAX_ALWAYS)) { - if (!sbi->dif0.dax_dev) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); - } else if (sbi->blkszbits != PAGE_SHIFT) { - errorfc(fc, "unsupported blocksize for DAX"); - clear_opt(&sbi->opt, DAX_ALWAYS); - } + if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) { + erofs_info(sb, "unsupported blocksize for DAX"); + clear_opt(&sbi->opt, DAX_ALWAYS); } sb->s_time_gran = 1; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 792f20888a8f..2d73297003d2 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1432,6 +1432,16 @@ static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work) } #endif +/* Use (kthread_)work in atomic contexts to minimize scheduling overhead */ +static inline bool z_erofs_in_atomic(void) +{ + if (IS_ENABLED(CONFIG_PREEMPTION) && rcu_preempt_depth()) + return true; + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return true; + return !preemptible(); +} + static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { @@ -1446,8 +1456,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, if (atomic_add_return(bios, &io->pending_bios)) return; - /* Use (kthread_)work and sync decompression for atomic contexts only */ - if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) { + if (z_erofs_in_atomic()) { #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD struct kthread_worker *worker; diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 4f6468eb2adf..cb237f1b902a 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -103,10 +103,11 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfsd_file_get(new) == NULL) goto again; /* - * Drop the ref we were going to install and the - * one we were going to return. + * Drop the ref we were going to install (both file and + * net) and the one we were going to return (only file). */ nfsd_file_put(localio); + nfsd_net_put(net); nfsd_file_put(localio); localio = new; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 98ab55ba3ced..edf050766e57 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -470,7 +470,15 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (!iap->ia_valid) return 0; - iap->ia_valid |= ATTR_CTIME; + /* + * If ATTR_DELEG is set, then this is an update from a client that + * holds a delegation. If this is an update for only the atime, the + * ctime should not be changed. If the update contains the mtime + * too, then ATTR_CTIME should already be set. + */ + if (!(iap->ia_valid & ATTR_DELEG)) + iap->ia_valid |= ATTR_CTIME; + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3d6d8a9f13fc..29cca0e6d0ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -340,8 +340,8 @@ static int proc_maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR_OR_NULL(priv->mm)) { - int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); seq_release_private(inode, file); return err; @@ -1148,10 +1148,13 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; - pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; bool present = false; + spinlock_t *ptl; + pte_t ptent; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); + ptent = huge_ptep_get(walk->mm, addr, pte); if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; @@ -1170,6 +1173,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } + spin_unlock(ptl); return 0; } #else @@ -2017,12 +2021,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, struct pagemapread *pm = walk->private; struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; + spinlock_t *ptl; int err = 0; pte_t pte; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep); pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); @@ -2050,11 +2056,12 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, err = add_to_pagemap(&pme, pm); if (err) - return err; + break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; } + spin_unlock(ptl); cond_resched(); return err; @@ -3128,17 +3135,22 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); + pte_t huge_pte; struct numa_maps *md; struct page *page; + spinlock_t *ptl; + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); + huge_pte = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(huge_pte)) - return 0; + goto out; page = pte_page(huge_pte); md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); +out: + spin_unlock(ptl); return 0; } diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index bc1c1e9b288a..43b86fa4d695 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -124,55 +124,44 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, dp = description; /* start with version and hostname portion of UNC string */ spnego_key = ERR_PTR(-EINVAL); - sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, - hostname); - dp = description + strlen(description); + dp += sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, + hostname); /* add the server address */ if (server->dstaddr.ss_family == AF_INET) - sprintf(dp, "ip4=%pI4", &sa->sin_addr); + dp += sprintf(dp, "ip4=%pI4", &sa->sin_addr); else if (server->dstaddr.ss_family == AF_INET6) - sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); + dp += sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); else goto out; - dp = description + strlen(description); - /* for now, only sec=krb5 and sec=mskrb5 and iakerb are valid */ if (server->sec_kerberos) - sprintf(dp, ";sec=krb5"); + dp += sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) - sprintf(dp, ";sec=mskrb5"); + dp += sprintf(dp, ";sec=mskrb5"); else if (server->sec_iakerb) - sprintf(dp, ";sec=iakerb"); + dp += sprintf(dp, ";sec=iakerb"); else { cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); - sprintf(dp, ";sec=krb5"); + dp += sprintf(dp, ";sec=krb5"); } - dp = description + strlen(description); - sprintf(dp, ";uid=0x%x", - from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); + dp += sprintf(dp, ";uid=0x%x", + from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); - dp = description + strlen(description); - sprintf(dp, ";creduid=0x%x", + dp += sprintf(dp, ";creduid=0x%x", from_kuid_munged(&init_user_ns, sesInfo->cred_uid)); - if (sesInfo->user_name) { - dp = description + strlen(description); - sprintf(dp, ";user=%s", sesInfo->user_name); - } + if (sesInfo->user_name) + dp += sprintf(dp, ";user=%s", sesInfo->user_name); - dp = description + strlen(description); - sprintf(dp, ";pid=0x%x", current->pid); + dp += sprintf(dp, ";pid=0x%x", current->pid); - if (sesInfo->upcall_target == UPTARGET_MOUNT) { - dp = description + strlen(description); - sprintf(dp, ";upcall_target=mount"); - } else { - dp = description + strlen(description); - sprintf(dp, ";upcall_target=app"); - } + if (sesInfo->upcall_target == UPTARGET_MOUNT) + dp += sprintf(dp, ";upcall_target=mount"); + else + dp += sprintf(dp, ";upcall_target=app"); cifs_dbg(FYI, "key description = %s\n", description); saved_cred = override_creds(spnego_cred); diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 487f39cff77e..3ce7c614ccc0 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 55 -#define CIFS_VERSION "2.55" +#define SMB3_PRODUCT_BUILD 56 +#define CIFS_VERSION "2.56" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index e6830ab3a546..1e64a4fb6af0 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1732,6 +1732,7 @@ struct mid_q_entry { int mid_rc; /* rc for MID_RC */ __le16 command; /* smb command code */ unsigned int optype; /* operation type */ + spinlock_t mid_lock; bool wait_cancelled:1; /* Cancelled while waiting for response */ bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ @@ -2036,6 +2037,9 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled + * mid_q_entry->mid_lock mid_q_entry->callback alloc_mid + * smb2_mid_entry_alloc + * (Any fields of mid_q_entry that will need protection) ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE @@ -2375,6 +2379,23 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen) return ret; } +/* + * Execute mid callback atomically - ensures callback runs exactly once + * and prevents sleeping in atomic context. + */ +static inline void mid_execute_callback(struct mid_q_entry *mid) +{ + void (*callback)(struct mid_q_entry *mid); + + spin_lock(&mid->mid_lock); + callback = mid->callback; + mid->callback = NULL; /* Mark as executed, */ + spin_unlock(&mid->mid_lock); + + if (callback) + callback(mid); +} + #define CIFS_REPARSE_SUPPORT(tcon) \ ((tcon)->posix_extensions || \ (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c index 352dafb888dd..e98b95eff8c9 100644 --- a/fs/smb/client/cifstransport.c +++ b/fs/smb/client/cifstransport.c @@ -46,6 +46,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); kref_init(&temp->refcount); + spin_lock_init(&temp->mid_lock); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -345,16 +346,15 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(server, midQ); if (rc != 0) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { + spin_lock(&midQ->mid_lock); + if (midQ->callback) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); add_credits(server, &credits, 0); return rc; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); } rc = cifs_sync_mid_result(midQ, server); @@ -527,15 +527,14 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = wait_for_response(server, midQ); if (rc) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { + spin_lock(&midQ->mid_lock); + if (midQ->callback) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); return rc; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); } /* We got the response - restart system call. */ diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c index 766b4de13da7..db709f5cd2e1 100644 --- a/fs/smb/client/compress.c +++ b/fs/smb/client/compress.c @@ -155,58 +155,29 @@ static int cmp_bkt(const void *_a, const void *_b) } /* - * TODO: - * Support other iter types, if required. - * Only ITER_XARRAY is supported for now. + * Collect some 2K samples with 2K gaps between. */ -static int collect_sample(const struct iov_iter *iter, ssize_t max, u8 *sample) +static int collect_sample(const struct iov_iter *source, ssize_t max, u8 *sample) { - struct folio *folios[16], *folio; - unsigned int nr, i, j, npages; - loff_t start = iter->xarray_start + iter->iov_offset; - pgoff_t last, index = start / PAGE_SIZE; - size_t len, off, foff; - void *p; - int s = 0; - - last = (start + max - 1) / PAGE_SIZE; - do { - nr = xa_extract(iter->xarray, (void **)folios, index, last, ARRAY_SIZE(folios), - XA_PRESENT); - if (nr == 0) - return -EIO; - - for (i = 0; i < nr; i++) { - folio = folios[i]; - npages = folio_nr_pages(folio); - foff = start - folio_pos(folio); - off = foff % PAGE_SIZE; - - for (j = foff / PAGE_SIZE; j < npages; j++) { - size_t len2; - - len = min_t(size_t, max, PAGE_SIZE - off); - len2 = min_t(size_t, len, SZ_2K); - - p = kmap_local_page(folio_page(folio, j)); - memcpy(&sample[s], p, len2); - kunmap_local(p); - - s += len2; - - if (len2 < SZ_2K || s >= max - SZ_2K) - return s; - - max -= len; - if (max <= 0) - return s; - - start += len; - off = 0; - index++; - } - } - } while (nr == ARRAY_SIZE(folios)); + struct iov_iter iter = *source; + size_t s = 0; + + while (iov_iter_count(&iter) >= SZ_2K) { + size_t part = umin(umin(iov_iter_count(&iter), SZ_2K), max); + size_t n; + + n = copy_from_iter(sample + s, part, &iter); + if (n != part) + return -EFAULT; + + s += n; + max -= n; + + if (iov_iter_count(&iter) < PAGE_SIZE - SZ_2K) + break; + + iov_iter_advance(&iter, SZ_2K); + } return s; } diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 587845a2452d..dd12f3eb61dc 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -335,7 +335,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_entry_safe(mid, nmid, &retry_list, qhead) { list_del_init(&mid->qhead); - mid->callback(mid); + mid_execute_callback(mid); release_mid(mid); } @@ -919,7 +919,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) list_del_init(&mid->qhead); mid->mid_rc = mid_rc; mid->mid_state = MID_RC; - mid->callback(mid); + mid_execute_callback(mid); release_mid(mid); } @@ -1117,7 +1117,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid); list_del_init(&mid_entry->qhead); - mid_entry->callback(mid_entry); + mid_execute_callback(mid_entry); release_mid(mid_entry); } /* 1/8th of sec is more than enough time for them to exit */ @@ -1394,7 +1394,7 @@ next_pdu: } if (!mids[i]->multiRsp || mids[i]->multiEnd) - mids[i]->callback(mids[i]); + mid_execute_callback(mids[i]); release_mid(mids[i]); } else if (server->ops->is_oplock_break && @@ -4205,7 +4205,6 @@ retry: return 0; } - server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; server->neg_start = jiffies; spin_unlock(&server->srv_lock); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 75be4b46bc6f..fe453a4b3dc8 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1943,15 +1943,24 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *tcon; + __u32 dosattr = 0, origattr = 0; struct TCP_Server_Info *server; struct iattr *attrs = NULL; - __u32 dosattr = 0, origattr = 0; + bool rehash = false; cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry); if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* Unhash dentry in advance to prevent any concurrent opens */ + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) { + __d_drop(dentry); + rehash = true; + } + spin_unlock(&dentry->d_lock); + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -2003,7 +2012,8 @@ psx_del_no_retry: cifs_drop_nlink(inode); } } else if (rc == -ENOENT) { - d_drop(dentry); + if (simple_positive(dentry)) + d_delete(dentry); } else if (rc == -EBUSY) { if (server->ops->rename_pending_delete) { rc = server->ops->rename_pending_delete(full_path, @@ -2056,6 +2066,8 @@ unlink_out: kfree(attrs); free_xid(xid); cifs_put_tlink(tlink); + if (rehash) + d_rehash(dentry); return rc; } @@ -2462,6 +2474,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; + bool rehash = false; unsigned int xid; int rc, tmprc; int retry_count = 0; @@ -2477,6 +2490,17 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* + * Prevent any concurrent opens on the target by unhashing the dentry. + * VFS already unhashes the target when renaming directories. + */ + if (d_is_positive(target_dentry) && !d_is_dir(target_dentry)) { + if (!d_unhashed(target_dentry)) { + d_drop(target_dentry); + rehash = true; + } + } + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -2518,6 +2542,8 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, } } + if (!rc) + rehash = false; /* * No-replace is the natural behavior for CIFS, so skip unlink hacks. */ @@ -2576,12 +2602,16 @@ unlink_target: goto cifs_rename_exit; rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + if (!rc) + rehash = false; } /* force revalidate to go get info when needed */ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; cifs_rename_exit: + if (rehash) + d_rehash(target_dentry); kfree(info_buf_source); free_dentry_path(page2); free_dentry_path(page1); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ad8947434b71..3b251de874ec 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -772,6 +772,13 @@ next_iface: bytes_left -= sizeof(*p); break; } + /* Validate that Next doesn't point beyond the buffer */ + if (next > bytes_left) { + cifs_dbg(VFS, "%s: invalid Next pointer %zu > %zd\n", + __func__, next, bytes_left); + rc = -EINVAL; + goto out; + } p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); bytes_left -= next; } @@ -783,7 +790,9 @@ next_iface: } /* Azure rounds the buffer size up 8, to a 16 byte boundary */ - if ((bytes_left > 8) || p->Next) + if ((bytes_left > 8) || + (bytes_left >= offsetof(struct network_interface_info_ioctl_rsp, Next) + + sizeof(p->Next) && p->Next)) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); ses->iface_last_update = jiffies; @@ -4805,7 +4814,7 @@ static void smb2_decrypt_offload(struct work_struct *work) dw->server->ops->is_network_name_deleted(dw->buf, dw->server); - mid->callback(mid); + mid_execute_callback(mid); } else { spin_lock(&dw->server->srv_lock); if (dw->server->tcpStatus == CifsNeedReconnect) { @@ -4813,7 +4822,7 @@ static void smb2_decrypt_offload(struct work_struct *work) mid->mid_state = MID_RETRY_NEEDED; spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); - mid->callback(mid); + mid_execute_callback(mid); } else { spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index ff9ef7fcd010..bc0e92eb2b64 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -771,6 +771,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); kref_init(&temp->refcount); + spin_lock_init(&temp->mid_lock); temp->mid = le64_to_cpu(shdr->MessageId); temp->credits = credits > 0 ? credits : 1; temp->pid = current->pid; diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c628e91c328b..02d6db431fd4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1337,10 +1337,6 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); - log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); - wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0); - /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { @@ -1986,7 +1982,11 @@ int smbd_send(struct TCP_Server_Info *server, */ wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0); + atomic_read(&info->send_pending) == 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) + rc = -EAGAIN; return rc; } diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 32d528b4dd83..a61ba7f3fb86 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1005,15 +1005,14 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); - spin_lock(&server->mid_queue_lock); + spin_lock(&midQ[i]->mid_lock); midQ[i]->wait_cancelled = true; - if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || - midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { + if (midQ[i]->callback) { midQ[i]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; credits[i].value = 0; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ[i]->mid_lock); } } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 1e6e9c10cea2..a8187281eb96 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -479,7 +479,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class, __field(xfs_exntst_t, state) ), TP_fast_assign( - __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev; + __entry->dev = cursor->sc->mp->m_super->s_dev; __entry->dqtype = cursor->dqtype; __entry->ino = cursor->quota_ip->i_ino; __entry->cur_id = cursor->id; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 55a304cb3aef..f96fbf5c54c9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1101,9 +1101,6 @@ xfs_file_write_iter( if (xfs_is_shutdown(ip->i_mount)) return -EIO; - if (IS_DAX(inode)) - return xfs_file_dax_write(iocb, from); - if (iocb->ki_flags & IOCB_ATOMIC) { if (ocount < xfs_get_atomic_write_min(ip)) return -EINVAL; @@ -1116,6 +1113,9 @@ xfs_file_write_iter( return ret; } + if (IS_DAX(inode)) + return xfs_file_dax_write(iocb, from); + if (iocb->ki_flags & IOCB_DIRECT) { /* * Allow a directio write to fall back to a buffered diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 07fbdcc4cbf5..bd6d33557194 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -358,9 +358,20 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip) { + if (IS_DAX(VFS_IC(ip))) + return false; + return xfs_inode_buftarg(ip)->bt_awu_max > 0; } +static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip) +{ + if (IS_DAX(VFS_IC(ip))) + return false; + + return xfs_can_sw_atomic_write(ip->i_mount); +} + /* * In-core inode flags. */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index fe1f74a3b6a3..e1051a530a50 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -219,7 +219,7 @@ xfs_bulk_ireq_setup( else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) return -EINVAL; - breq->flags |= XFS_IBULK_SAME_AG; + breq->iwalk_flags |= XFS_IWALK_SAME_AG; /* Asking for an inode past the end of the AG? We're done! */ if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 149b5460fbfd..603effabe1ee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -616,7 +616,8 @@ xfs_get_atomic_write_min( * write of exactly one single fsblock if the bdev will make that * guarantee for us. */ - if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp)) + if (xfs_inode_can_hw_atomic_write(ip) || + xfs_inode_can_sw_atomic_write(ip)) return mp->m_sb.sb_blocksize; return 0; @@ -633,7 +634,7 @@ xfs_get_atomic_write_max( * write of exactly one single fsblock if the bdev will make that * guarantee for us. */ - if (!xfs_can_sw_atomic_write(mp)) { + if (!xfs_inode_can_sw_atomic_write(ip)) { if (xfs_inode_can_hw_atomic_write(ip)) return mp->m_sb.sb_blocksize; return 0; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c8c9b8d8309f..2aa37a4d2706 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -307,7 +307,6 @@ xfs_bulkstat( .breq = breq, }; struct xfs_trans *tp; - unsigned int iwalk_flags = 0; int error; if (breq->idmap != &nop_mnt_idmap) { @@ -328,10 +327,7 @@ xfs_bulkstat( * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - if (breq->flags & XFS_IBULK_SAME_AG) - iwalk_flags |= XFS_IWALK_SAME_AG; - - error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, + error = xfs_iwalk(breq->mp, tp, breq->startino, breq->iwalk_flags, xfs_bulkstat_iwalk, breq->icount, &bc); xfs_trans_cancel(tp); kfree(bc.buf); @@ -457,7 +453,7 @@ xfs_inumbers( * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, + error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->iwalk_flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index f10e8f8f2335..2d0612f14d6e 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -13,17 +13,15 @@ struct xfs_ibulk { xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ unsigned int ocount; /* number of records returned */ - unsigned int flags; /* see XFS_IBULK_FLAG_* */ + unsigned int flags; /* XFS_IBULK_FLAG_* */ + unsigned int iwalk_flags; /* XFS_IWALK_FLAG_* */ }; -/* Only iterate within the same AG as startino */ -#define XFS_IBULK_SAME_AG (1U << 0) - /* Fill out the bs_extents64 field if set. */ -#define XFS_IBULK_NREXT64 (1U << 1) +#define XFS_IBULK_NREXT64 (1U << 0) /* Signal that we can return metadata directories. */ -#define XFS_IBULK_METADIR (1U << 2) +#define XFS_IBULK_METADIR (1U << 1) /* * Advance the user buffer pointer by one record of the given size. If the diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2133fbaf1766..dc32c5e34d81 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -779,6 +779,25 @@ xfs_set_max_atomic_write_opt( return -EINVAL; } + if (xfs_has_reflink(mp)) + goto set_limit; + + if (new_max_fsbs == 1) { + if (mp->m_ddev_targp->bt_awu_max || + (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_awu_max)) { + } else { + xfs_warn(mp, + "cannot support atomic writes of size %lluk with no reflink or HW support", + new_max_bytes >> 10); + return -EINVAL; + } + } else { + xfs_warn(mp, + "cannot support atomic writes of size %lluk with no reflink support", + new_max_bytes >> 10); + return -EINVAL; + } + set_limit: error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs); if (error) { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e1794e3e3156..ac344e42846c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -455,6 +455,7 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \ xfs_extlen_t len), \ TP_ARGS(oz, rgbno, len)) DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks); +DEFINE_ZONE_ALLOC_EVENT(xfs_zone_skip_blocks); DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks); TRACE_EVENT(xfs_zone_gc_select_victim, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ece374d622b3..575e7028f423 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -253,8 +253,8 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); tp = __xfs_trans_alloc(mp, flags); + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 33f7eee521a8..f8bd6d741755 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -166,10 +166,9 @@ xfs_open_zone_mark_full( static void xfs_zone_record_blocks( struct xfs_trans *tp, - xfs_fsblock_t fsbno, - xfs_filblks_t len, struct xfs_open_zone *oz, - bool used) + xfs_fsblock_t fsbno, + xfs_filblks_t len) { struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = oz->oz_rtg; @@ -179,18 +178,37 @@ xfs_zone_record_blocks( xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); - if (used) { - rmapip->i_used_blocks += len; - ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); - } else { - xfs_add_frextents(mp, len); - } + rmapip->i_used_blocks += len; + ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) xfs_open_zone_mark_full(oz); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); } +/* + * Called for blocks that have been written to disk, but not actually linked to + * an inode, which can happen when garbage collection races with user data + * writes to a file. + */ +static void +xfs_zone_skip_blocks( + struct xfs_open_zone *oz, + xfs_filblks_t len) +{ + struct xfs_rtgroup *rtg = oz->oz_rtg; + + trace_xfs_zone_skip_blocks(oz, 0, len); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + oz->oz_written += len; + if (oz->oz_written == rtg_blocks(rtg)) + xfs_open_zone_mark_full(oz); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + + xfs_add_frextents(rtg_mount(rtg), len); +} + static int xfs_zoned_map_extent( struct xfs_trans *tp, @@ -250,8 +268,7 @@ xfs_zoned_map_extent( } } - xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz, - true); + xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount); /* Map the new blocks into the data fork. */ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new); @@ -259,8 +276,7 @@ xfs_zoned_map_extent( skip: trace_xfs_reflink_cow_remap_skip(ip, new); - xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz, - false); + xfs_zone_skip_blocks(oz, new->br_blockcount); return 0; } |