diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 44 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 78 | ||||
-rw-r--r-- | fs/efivarfs/super.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 3 | ||||
-rw-r--r-- | fs/proc/generic.c | 38 | ||||
-rw-r--r-- | fs/smb/client/cifsfs.c | 14 | ||||
-rw-r--r-- | fs/smb/client/smb2inode.c | 7 | ||||
-rw-r--r-- | fs/xfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_remote.c | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_zone_alloc.c | 45 | ||||
-rw-r--r-- | fs/xfs/xfs_zone_space_resv.c | 6 |
14 files changed, 148 insertions, 110 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b99fb0273292..0387b9f43a52 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -248,7 +248,7 @@ struct btrfs_inode { u64 new_delalloc_bytes; /* * The offset of the last dir index key that was logged. - * This is used only for directories. + * This is used only for directories. Protected by 'log_mutex'. */ u64 last_dir_index_offset; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9e4aec7330cb..dd82dcc7b2b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6805,7 +6805,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct fscrypt_name fname; u64 index; int ret; - int drop_inode = 0; /* do not allow sys_link's with other subvols of the same device */ if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root)) @@ -6837,44 +6836,44 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, /* There are several dir indexes for this inode, clear the cache. */ BTRFS_I(inode)->dir_index = 0ULL; - inc_nlink(inode); inode_inc_iversion(inode); inode_set_ctime_current(inode); - ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); + if (ret) + goto fail; + /* Link added now we update the inode item with the new link count. */ + inc_nlink(inode); + ret = btrfs_update_inode(trans, BTRFS_I(inode)); if (ret) { - drop_inode = 1; - } else { - struct dentry *parent = dentry->d_parent; + btrfs_abort_transaction(trans, ret); + goto fail; + } - ret = btrfs_update_inode(trans, BTRFS_I(inode)); - if (ret) + if (inode->i_nlink == 1) { + /* + * If the new hard link count is 1, it's a file created with the + * open(2) O_TMPFILE flag. + */ + ret = btrfs_orphan_del(trans, BTRFS_I(inode)); + if (ret) { + btrfs_abort_transaction(trans, ret); goto fail; - if (inode->i_nlink == 1) { - /* - * If new hard link count is 1, it's a file created - * with open(2) O_TMPFILE flag. - */ - ret = btrfs_orphan_del(trans, BTRFS_I(inode)); - if (ret) - goto fail; } - d_instantiate(dentry, inode); - btrfs_log_new_name(trans, old_dentry, NULL, 0, parent); } + /* Grab reference for the new dentry passed to d_instantiate(). */ + ihold(inode); + d_instantiate(dentry, inode); + btrfs_log_new_name(trans, old_dentry, NULL, 0, dentry->d_parent); + fail: fscrypt_free_filename(&fname); if (trans) btrfs_end_transaction(trans); - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } btrfs_btree_balance_dirty(fs_info); return ret; } @@ -7830,6 +7829,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; + /* new_delalloc_bytes and last_dir_index_offset are in a union. */ ei->new_delalloc_bytes = 0; ei->defrag_bytes = 0; ei->disk_i_size = 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 69e11557fd13..7d5d90845ca9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } +static bool mark_inode_as_not_logged(const struct btrfs_trans_handle *trans, + struct btrfs_inode *inode) +{ + bool ret = false; + + /* + * Do this only if ->logged_trans is still 0 to prevent races with + * concurrent logging as we may see the inode not logged when + * inode_logged() is called but it gets logged after inode_logged() did + * not find it in the log tree and we end up setting ->logged_trans to a + * value less than trans->transid after the concurrent logging task has + * set it to trans->transid. As a consequence, subsequent rename, unlink + * and link operations may end up not logging new names and removing old + * names from the log. + */ + spin_lock(&inode->lock); + if (inode->logged_trans == 0) + inode->logged_trans = trans->transid - 1; + else if (inode->logged_trans == trans->transid) + ret = true; + spin_unlock(&inode->lock); + + return ret; +} + /* * Check if an inode was logged in the current transaction. This correctly deals * with the case where the inode was logged but has a logged_trans of 0, which @@ -3357,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; - if (inode->logged_trans == trans->transid) + /* + * Quick lockless call, since once ->logged_trans is set to the current + * transaction, we never set it to a lower value anywhere else. + */ + if (data_race(inode->logged_trans) == trans->transid) return 1; /* - * If logged_trans is not 0, then we know the inode logged was not logged - * in this transaction, so we can return false right away. + * If logged_trans is not 0 and not trans->transid, then we know the + * inode was not logged in this transaction, so we can return false + * right away. We take the lock to avoid a race caused by load/store + * tearing with a concurrent btrfs_log_inode() call or a concurrent task + * in this function further below - an update to trans->transid can be + * teared into two 32 bits updates for example, in which case we could + * see a positive value that is not trans->transid and assume the inode + * was not logged when it was. */ - if (inode->logged_trans > 0) + spin_lock(&inode->lock); + if (inode->logged_trans == trans->transid) { + spin_unlock(&inode->lock); + return 1; + } else if (inode->logged_trans > 0) { + spin_unlock(&inode->lock); return 0; + } + spin_unlock(&inode->lock); /* * If no log tree was created for this root in this transaction, then @@ -3374,10 +3416,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * transaction's ID, to avoid the search below in a future call in case * a log tree gets created after this. */ - if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) { - inode->logged_trans = trans->transid - 1; - return 0; - } + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) + return mark_inode_as_not_logged(trans, inode); /* * We have a log tree and the inode's logged_trans is 0. We can't tell @@ -3431,8 +3471,7 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * Set logged_trans to a value greater than 0 and less then the * current transaction to avoid doing the search in future calls. */ - inode->logged_trans = trans->transid - 1; - return 0; + return mark_inode_as_not_logged(trans, inode); } /* @@ -3440,20 +3479,9 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * the current transacion's ID, to avoid future tree searches as long as * the inode is not evicted again. */ + spin_lock(&inode->lock); inode->logged_trans = trans->transid; - - /* - * If it's a directory, then we must set last_dir_index_offset to the - * maximum possible value, so that the next attempt to log the inode does - * not skip checking if dir index keys found in modified subvolume tree - * leaves have been logged before, otherwise it would result in attempts - * to insert duplicate dir index keys in the log tree. This must be done - * because last_dir_index_offset is an in-memory only field, not persisted - * in the inode item or any other on-disk structure, so its value is lost - * once the inode is evicted. - */ - if (S_ISDIR(inode->vfs_inode.i_mode)) - inode->last_dir_index_offset = (u64)-1; + spin_unlock(&inode->lock); return 1; } @@ -4045,7 +4073,7 @@ done: /* * If the inode was logged before and it was evicted, then its - * last_dir_index_offset is (u64)-1, so we don't the value of the last index + * last_dir_index_offset is 0, so we don't know the value of the last index * key offset. If that's the case, search for it and update the inode. This * is to avoid lookups in the log tree every time we try to insert a dir index * key from a leaf changed in the current transaction, and to allow us to always @@ -4061,7 +4089,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode, lockdep_assert_held(&inode->log_mutex); - if (inode->last_dir_index_offset != (u64)-1) + if (inode->last_dir_index_offset != 0) return 0; if (!ctx->logged_before) { diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index c4a139911356..4bb4002e3cdf 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -152,6 +152,10 @@ static int efivarfs_d_compare(const struct dentry *dentry, { int guid = len - EFI_VARIABLE_GUID_LEN; + /* Parallel lookups may produce a temporary invalid filename */ + if (guid <= 0) + return 1; + if (name->len != len) return 1; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 14bf440ea4df..6c4f78f473fb 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1281,6 +1281,9 @@ static void ocfs2_clear_inode(struct inode *inode) * the journal is flushed before journal shutdown. Thus it is safe to * have inodes get cleaned up after journal shutdown. */ + if (!osb->journal) + return; + jbd2_journal_release_jbd_inode(osb->journal->j_journal, &oi->ip_jinode); } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 76e800e38c8f..bd0c099cfdd2 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -367,6 +367,25 @@ static const struct inode_operations proc_dir_inode_operations = { .setattr = proc_notify_change, }; +static void pde_set_flags(struct proc_dir_entry *pde) +{ + const struct proc_ops *proc_ops = pde->proc_ops; + + if (!proc_ops) + return; + + if (proc_ops->proc_flags & PROC_ENTRY_PERMANENT) + pde->flags |= PROC_ENTRY_PERMANENT; + if (proc_ops->proc_read_iter) + pde->flags |= PROC_ENTRY_proc_read_iter; +#ifdef CONFIG_COMPAT + if (proc_ops->proc_compat_ioctl) + pde->flags |= PROC_ENTRY_proc_compat_ioctl; +#endif + if (proc_ops->proc_lseek) + pde->flags |= PROC_ENTRY_proc_lseek; +} + /* returns the registered entry, or frees dp and returns NULL on failure */ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, struct proc_dir_entry *dp) @@ -374,6 +393,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, if (proc_alloc_inum(&dp->low_ino)) goto out_free_entry; + pde_set_flags(dp); + write_lock(&proc_subdir_lock); dp->parent = dir; if (pde_subdir_insert(dir, dp) == false) { @@ -561,20 +582,6 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, return p; } -static void pde_set_flags(struct proc_dir_entry *pde) -{ - if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) - pde->flags |= PROC_ENTRY_PERMANENT; - if (pde->proc_ops->proc_read_iter) - pde->flags |= PROC_ENTRY_proc_read_iter; -#ifdef CONFIG_COMPAT - if (pde->proc_ops->proc_compat_ioctl) - pde->flags |= PROC_ENTRY_proc_compat_ioctl; -#endif - if (pde->proc_ops->proc_lseek) - pde->flags |= PROC_ENTRY_proc_lseek; -} - struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops, void *data) @@ -585,7 +592,6 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, if (!p) return NULL; p->proc_ops = proc_ops; - pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); @@ -636,7 +642,6 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, p->proc_ops = &proc_seq_ops; p->seq_ops = ops; p->state_size = state_size; - pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_seq_private); @@ -667,7 +672,6 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, return NULL; p->proc_ops = &proc_single_ops; p->single_show = show; - pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_single_data); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 3bd85ab2deb1..e1848276bab4 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1358,6 +1358,20 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, truncate_setsize(target_inode, new_size); fscache_resize_cookie(cifs_inode_cookie(target_inode), new_size); + } else if (rc == -EOPNOTSUPP) { + /* + * copy_file_range syscall man page indicates EINVAL + * is returned e.g when "fd_in and fd_out refer to the + * same file and the source and target ranges overlap." + * Test generic/157 was what showed these cases where + * we need to remap EOPNOTSUPP to EINVAL + */ + if (off >= src_inode->i_size) { + rc = -EINVAL; + } else if (src_inode == target_inode) { + if (off + len > destoff) + rc = -EINVAL; + } } if (rc == 0 && new_size > target_cifsi->netfs.zero_point) target_cifsi->netfs.zero_point = new_size; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 2a0316c514e4..31c13fb5b85b 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -207,8 +207,10 @@ replay_again: server = cifs_pick_channel(ses); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); - if (vars == NULL) - return -ENOMEM; + if (vars == NULL) { + rc = -ENOMEM; + goto out; + } rqst = &vars->rqst[0]; rsp_iov = &vars->rsp_iov[0]; @@ -864,6 +866,7 @@ finished: smb2_should_replay(tcon, &retries, &cur_sleep)) goto replay_again; +out: if (cfile) cifsFileInfo_put(cfile); diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index ae0ca6858496..065953475cf5 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -105,6 +105,7 @@ config XFS_POSIX_ACL config XFS_RT bool "XFS Realtime subvolume support" depends on XFS_FS + default BLK_DEV_ZONED help If you say Y here you will be able to mount and use XFS filesystems which contain a realtime subvolume. The realtime subvolume is a diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 4c44ce1c8a64..bff3dc226f81 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -435,6 +435,13 @@ xfs_attr_rmtval_get( 0, &bp, &xfs_attr3_rmt_buf_ops); if (xfs_metadata_is_sick(error)) xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK); + /* + * ENODATA from disk implies a disk medium failure; + * ENODATA for xattrs means attribute not found, so + * disambiguate that here. + */ + if (error == -ENODATA) + error = -EIO; if (error) return error; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 17d9e6154f19..723a0643b838 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2833,6 +2833,12 @@ xfs_da_read_buf( &bp, ops); if (xfs_metadata_is_sick(error)) xfs_dirattr_mark_sick(dp, whichfork); + /* + * ENODATA from disk implies a disk medium failure; ENODATA for + * xattrs means attribute not found, so disambiguate that here. + */ + if (error == -ENODATA && whichfork == XFS_ATTR_FORK) + error = -EIO; if (error) goto out_free; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1ee4f835ac3c..a26f79815533 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -760,6 +760,9 @@ xfs_vm_swap_activate( { struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + if (xfs_is_zoned_inode(ip)) + return -EINVAL; + /* * Swap file activation can race against concurrent shared extent * removal in files that have been cloned. If this happens, diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index f8bd6d741755..f28214c28ab5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -374,44 +374,6 @@ xfs_zone_free_blocks( return 0; } -/* - * Check if the zone containing the data just before the offset we are - * writing to is still open and has space. - */ -static struct xfs_open_zone * -xfs_last_used_zone( - struct iomap_ioend *ioend) -{ - struct xfs_inode *ip = XFS_I(ioend->io_inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSB(mp, ioend->io_offset); - struct xfs_rtgroup *rtg = NULL; - struct xfs_open_zone *oz = NULL; - struct xfs_iext_cursor icur; - struct xfs_bmbt_irec got; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (!xfs_iext_lookup_extent_before(ip, &ip->i_df, &offset_fsb, - &icur, &got)) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return NULL; - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - rtg = xfs_rtgroup_grab(mp, xfs_rtb_to_rgno(mp, got.br_startblock)); - if (!rtg) - return NULL; - - xfs_ilock(rtg_rmap(rtg), XFS_ILOCK_SHARED); - oz = READ_ONCE(rtg->rtg_open_zone); - if (oz && (oz->oz_is_gc || !atomic_inc_not_zero(&oz->oz_ref))) - oz = NULL; - xfs_iunlock(rtg_rmap(rtg), XFS_ILOCK_SHARED); - - xfs_rtgroup_rele(rtg); - return oz; -} - static struct xfs_group * xfs_find_free_zone( struct xfs_mount *mp, @@ -918,12 +880,9 @@ xfs_zone_alloc_and_submit( goto out_error; /* - * If we don't have a cached zone in this write context, see if the - * last extent before the one we are writing to points to an active - * zone. If so, just continue writing to it. + * If we don't have a locally cached zone in this write context, see if + * the inode is still associated with a zone and use that if so. */ - if (!*oz && ioend->io_offset) - *oz = xfs_last_used_zone(ioend); if (!*oz) *oz = xfs_cached_zone(mp, ip); diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c index 1313c55b8cbe..9cd38716fd25 100644 --- a/fs/xfs/xfs_zone_space_resv.c +++ b/fs/xfs/xfs_zone_space_resv.c @@ -10,6 +10,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_rtbitmap.h" +#include "xfs_icache.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" #include "xfs_zones.h" @@ -230,6 +231,11 @@ xfs_zoned_space_reserve( error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, flags & XFS_ZR_RESERVED); + if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) { + xfs_inodegc_flush(mp); + error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, + flags & XFS_ZR_RESERVED); + } if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags); if (error) |