From b091f7fede97cc64f7aaad3eeb37965aebee3082 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Jun 2020 11:31:59 -0400 Subject: btrfs: use kfree() in btrfs_ioctl_get_subvol_info() In btrfs_ioctl_get_subvol_info(), there is a classic case where kzalloc() was incorrectly paired with kzfree(). According to David Sterba, there isn't any sensitive information in the subvol_info that needs to be cleared before freeing. So kzfree() isn't really needed, use kfree() instead. Signed-off-by: Waiman Long Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 168deb8ef68a..e8f7c5f00894 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2692,7 +2692,7 @@ out: btrfs_put_root(root); out_free: btrfs_free_path(path); - kzfree(subvol_info); + kfree(subvol_info); return ret; } -- cgit From c3504372699bff6daeda207b4e30256c39f584c1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:03 +0300 Subject: btrfs: make btrfs_lookup_ordered_extent take btrfs_inode It doesn't use the generic vfs inode for anything use btrfs_inode directly. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e8f7c5f00894..b3e4c632d80c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1265,7 +1265,7 @@ again: while (1) { lock_extent_bits(tree, page_start, page_end, &cached_state); - ordered = btrfs_lookup_ordered_extent(inode, + ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), page_start); unlock_extent_cached(tree, page_start, page_end, &cached_state); -- cgit From 86d52921a2ba51a78e5bfb71c75aedcbd9e61a5c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:40 +0300 Subject: btrfs: make btrfs_delalloc_release_space take btrfs_inode It needs btrfs_inode so take it as a parameter directly. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b3e4c632d80c..90083fb04928 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1333,7 +1333,7 @@ again: spin_lock(&BTRFS_I(inode)->lock); btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); spin_unlock(&BTRFS_I(inode)->lock); - btrfs_delalloc_release_space(inode, data_reserved, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, start_index << PAGE_SHIFT, (page_cnt - i_done) << PAGE_SHIFT, true); } @@ -1361,7 +1361,7 @@ out: unlock_page(pages[i]); put_page(pages[i]); } - btrfs_delalloc_release_space(inode, data_reserved, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, start_index << PAGE_SHIFT, page_cnt << PAGE_SHIFT, true); btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); -- cgit From e5b7231e2009967b63a39ce0ec6a022307616b82 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:42 +0300 Subject: btrfs: make btrfs_delalloc_reserve_space take btrfs_inode All of its children take btrfs_inode so bubble up this requirement to btrfs_delalloc_reserve_space's interface and stop calling BTRFS_I internally. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 90083fb04928..4adfdfa28e53 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1243,7 +1243,7 @@ static int cluster_pages_for_defrag(struct inode *inode, page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, + ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, start_index << PAGE_SHIFT, page_cnt << PAGE_SHIFT); if (ret) -- cgit From 2dfb1e43f57dd3aeaa66f7cf05d068db2d4c8788 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Jun 2020 10:17:36 +0800 Subject: btrfs: preallocate anon block device at first phase of snapshot creation [BUG] When the anonymous block device pool is exhausted, subvolume/snapshot creation fails with EMFILE (Too many files open). This has been reported by a user. The allocation happens in the second phase during transaction commit where it's only way out is to abort the transaction BTRFS: Transaction aborted (error -24) WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs] RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs] Call Trace: create_pending_snapshots+0x82/0xa0 [btrfs] btrfs_commit_transaction+0x275/0x8c0 [btrfs] btrfs_mksubvol+0x4b9/0x500 [btrfs] btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs] btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs] btrfs_ioctl+0x11a4/0x2da0 [btrfs] do_vfs_ioctl+0xa9/0x640 ksys_ioctl+0x67/0x90 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 33f2f83f3d5250e9 ]--- BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown BTRFS info (device sda1): forced readonly BTRFS warning (device sda1): Skipping commit of aborted transaction. BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown [CAUSE] When the global anonymous block device pool is exhausted, the following call chain will fail, and lead to transaction abort: btrfs_ioctl_snap_create_v2() |- btrfs_ioctl_snap_create_transid() |- btrfs_mksubvol() |- btrfs_commit_transaction() |- create_pending_snapshot() |- btrfs_get_fs_root() |- btrfs_init_fs_root() |- get_anon_bdev() [FIX] Although we can't enlarge the anonymous block device pool, at least we can preallocate anon_dev for subvolume/snapshot in the first phase, outside of transaction context and exactly at the moment the user calls the creation ioctl. Reported-by: Greed Rong Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/ CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4adfdfa28e53..ab34179d7cbc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -566,6 +566,7 @@ static noinline int create_subvol(struct inode *dir, struct inode *inode; int ret; int err; + dev_t anon_dev = 0; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; @@ -578,6 +579,10 @@ static noinline int create_subvol(struct inode *dir, if (ret) goto fail_free; + ret = get_anon_bdev(&anon_dev); + if (ret < 0) + goto fail_free; + /* * Don't create subvolume whose level is not zero. Or qgroup will be * screwed up since it assumes subvolume qgroup's level to be 0. @@ -660,12 +665,15 @@ static noinline int create_subvol(struct inode *dir, goto fail; key.offset = (u64)-1; - new_root = btrfs_get_fs_root(fs_info, objectid, true); + new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); if (IS_ERR(new_root)) { + free_anon_bdev(anon_dev); ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); goto fail; } + /* Freeing will be done in btrfs_put_root() of new_root */ + anon_dev = 0; btrfs_record_root_in_trans(trans, new_root); @@ -735,6 +743,8 @@ fail: return ret; fail_free: + if (anon_dev) + free_anon_bdev(anon_dev); kfree(root_item); return ret; } @@ -762,6 +772,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (!pending_snapshot) return -ENOMEM; + ret = get_anon_bdev(&pending_snapshot->anon_dev); + if (ret < 0) + goto free_pending; pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), GFP_KERNEL); pending_snapshot->path = btrfs_alloc_path(); @@ -823,10 +836,16 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, d_instantiate(dentry, inode); ret = 0; + pending_snapshot->anon_dev = 0; fail: + /* Prevent double freeing of anon_dev */ + if (ret && pending_snapshot->snap) + pending_snapshot->snap->anon_dev = 0; btrfs_put_root(pending_snapshot->snap); btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); free_pending: + if (pending_snapshot->anon_dev) + free_anon_bdev(pending_snapshot->anon_dev); kfree(pending_snapshot->root_item); btrfs_free_path(pending_snapshot->path); kfree(pending_snapshot); -- cgit From 137c541821a83debb63b3fa8abdd1cbc41bdf3a1 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 Jul 2020 21:28:58 +0900 Subject: btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl With the recent addition of filesystem checksum types other than CRC32c, it is not anymore hard-coded which checksum type a btrfs filesystem uses. Up to now there is no good way to read the filesystem checksum, apart from reading the filesystem UUID and then query sysfs for the checksum type. Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl command which usually is used to query filesystem features. Also add a flags member indicating that the kernel responded with a set csum_type and csum_size field. For compatibility reasons, only return the csum_type and csum_size if the BTRFS_FS_INFO_FLAG_CSUM_INFO flag was passed to the kernel. Also clear any unknown flags so we don't pass false positives to user-space newer than the kernel. To simplify further additions to the ioctl, also switch the padding to a u8 array. Pahole was used to verify the result of this switch: The csum members are added before flags, which might look odd, but this is to keep the alignment requirements and not to introduce holes in the structure. $ pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko struct btrfs_ioctl_fs_info_args { __u64 max_id; /* 0 8 */ __u64 num_devices; /* 8 8 */ __u8 fsid[16]; /* 16 16 */ __u32 nodesize; /* 32 4 */ __u32 sectorsize; /* 36 4 */ __u32 clone_alignment; /* 40 4 */ __u16 csum_type; /* 44 2 */ __u16 csum_size; /* 46 2 */ __u64 flags; /* 48 8 */ __u8 reserved[968]; /* 56 968 */ /* size: 1024, cachelines: 16, members: 10 */ }; Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms") Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm") CC: stable@vger.kernel.org # 5.5+ Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab34179d7cbc..2854f4a40787 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3217,11 +3217,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_fs_info_args *fi_args; struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + u64 flags_in; int ret = 0; - fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); - if (!fi_args) - return -ENOMEM; + fi_args = memdup_user(arg, sizeof(*fi_args)); + if (IS_ERR(fi_args)) + return PTR_ERR(fi_args); + + flags_in = fi_args->flags; + memset(fi_args, 0, sizeof(*fi_args)); rcu_read_lock(); fi_args->num_devices = fs_devices->num_devices; @@ -3237,6 +3241,12 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, fi_args->sectorsize = fs_info->sectorsize; fi_args->clone_alignment = fs_info->sectorsize; + if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) { + fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy); + fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy); + fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO; + } + if (copy_to_user(arg, fi_args, sizeof(*fi_args))) ret = -EFAULT; -- cgit From 0fb408a558aadbaa58beb75b02c95741e1fbb514 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 Jul 2020 21:28:59 +0900 Subject: btrfs: add filesystem generation to FS_INFO ioctl Add retrieval of the filesystem's generation to the fsinfo ioctl. This is driven by setting the BTRFS_FS_INFO_FLAG_GENERATION flag in btrfs_ioctl_fs_info_args::flags. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2854f4a40787..55dd20d0f9cb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3247,6 +3247,11 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO; } + if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) { + fi_args->generation = fs_info->generation; + fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION; + } + if (copy_to_user(arg, fi_args, sizeof(*fi_args))) ret = -EFAULT; -- cgit From 49bac897683340457a0ab1f76b924a1220bdb604 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 Jul 2020 21:29:00 +0900 Subject: btrfs: add metadata_uuid to FS_INFO ioctl Add retrieval of the filesystem's metadata UUID to the fsinfo ioctl. This is driven by setting the BTRFS_FS_INFO_FLAG_METADATA_UUID flag in btrfs_ioctl_fs_info_args::flags. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 55dd20d0f9cb..b4ddf51ae377 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3252,6 +3252,12 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION; } + if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) { + memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid, + sizeof(fi_args->metadata_uuid)); + fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID; + } + if (copy_to_user(arg, fi_args, sizeof(*fi_args))) ret = -EFAULT; -- cgit From f37c563bab4297024c300b05c8f48430e323809d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 10 Jul 2020 09:49:56 +0200 Subject: btrfs: add missing check for nocow and compression inode flags User Forza reported on IRC that some invalid combinations of file attributes are accepted by chattr. The NODATACOW and compression file flags/attributes are mutually exclusive, but they could be set by 'chattr +c +C' on an empty file. The nodatacow will be in effect because it's checked first in btrfs_run_delalloc_range. Extend the flag validation to catch the following cases: - input flags are conflicting - old and new flags are conflicting - initialize the local variable with inode flags after inode ls locked Inode attributes take precedence over mount options and are an independent setting. Nocompress would be a no-op with nodatacow, but we don't want to mix any compression-related options with nodatacow. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b4ddf51ae377..bd3511c5ca81 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg) return 0; } -/* Check if @flags are a supported and valid set of FS_*_FL flags */ -static int check_fsflags(unsigned int flags) +/* + * Check if @flags are a supported and valid set of FS_*_FL flags and that + * the old and new flags are not conflicting + */ +static int check_fsflags(unsigned int old_flags, unsigned int flags) { if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ @@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags) FS_NOCOW_FL)) return -EOPNOTSUPP; + /* COMPR and NOCOMP on new/old are valid */ if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) return -EINVAL; + if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL)) + return -EINVAL; + + /* NOCOW and compression options are mutually exclusive */ + if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL))) + return -EINVAL; + if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL))) + return -EINVAL; + return 0; } @@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) unsigned int fsflags, old_fsflags; int ret; const char *comp = NULL; - u32 binode_flags = binode->flags; + u32 binode_flags; if (!inode_owner_or_capable(inode)) return -EPERM; @@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (copy_from_user(&fsflags, arg, sizeof(fsflags))) return -EFAULT; - ret = check_fsflags(fsflags); - if (ret) - return ret; - ret = mnt_want_write_file(file); if (ret) return ret; inode_lock(inode); - fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); if (ret) goto out_unlock; + ret = check_fsflags(old_fsflags, fsflags); + if (ret) + goto out_unlock; + + binode_flags = binode->flags; if (fsflags & FS_SYNC_FL) binode_flags |= BTRFS_INODE_SYNC; else -- cgit