summaryrefslogtreecommitdiff
path: root/fs/btrfs/ioctl.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r--fs/btrfs/ioctl.c1180
1 files changed, 886 insertions, 294 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 55f3ba6a831c..913acef3f0a9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -29,6 +29,7 @@
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#include <linux/sched/xacct.h>
+#include <linux/io_uring/cmd.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -117,8 +118,8 @@ struct btrfs_ioctl_encoded_io_args_32 {
#endif
/* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
- unsigned int flags)
+static unsigned int btrfs_mask_fsflags_for_type(const struct inode *inode,
+ unsigned int flags)
{
if (S_ISDIR(inode->i_mode))
return flags;
@@ -132,11 +133,11 @@ static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
* ioctl.
*/
-static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
+static unsigned int btrfs_inode_flags_to_fsflags(const struct btrfs_inode *inode)
{
unsigned int iflags = 0;
- u32 flags = binode->flags;
- u32 ro_flags = binode->ro_flags;
+ u32 flags = inode->flags;
+ u32 ro_flags = inode->ro_flags;
if (flags & BTRFS_INODE_SYNC)
iflags |= FS_SYNC_FL;
@@ -166,25 +167,24 @@ static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
/*
* Update inode->i_flags based on the btrfs internal flags.
*/
-void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
+void btrfs_sync_inode_flags_to_i_flags(struct btrfs_inode *inode)
{
- struct btrfs_inode *binode = BTRFS_I(inode);
unsigned int new_fl = 0;
- if (binode->flags & BTRFS_INODE_SYNC)
+ if (inode->flags & BTRFS_INODE_SYNC)
new_fl |= S_SYNC;
- if (binode->flags & BTRFS_INODE_IMMUTABLE)
+ if (inode->flags & BTRFS_INODE_IMMUTABLE)
new_fl |= S_IMMUTABLE;
- if (binode->flags & BTRFS_INODE_APPEND)
+ if (inode->flags & BTRFS_INODE_APPEND)
new_fl |= S_APPEND;
- if (binode->flags & BTRFS_INODE_NOATIME)
+ if (inode->flags & BTRFS_INODE_NOATIME)
new_fl |= S_NOATIME;
- if (binode->flags & BTRFS_INODE_DIRSYNC)
+ if (inode->flags & BTRFS_INODE_DIRSYNC)
new_fl |= S_DIRSYNC;
- if (binode->ro_flags & BTRFS_INODE_RO_VERITY)
+ if (inode->ro_flags & BTRFS_INODE_RO_VERITY)
new_fl |= S_VERITY;
- set_mask_bits(&inode->i_flags,
+ set_mask_bits(&inode->vfs_inode.i_flags,
S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC |
S_VERITY, new_fl);
}
@@ -218,7 +218,7 @@ static int check_fsflags(unsigned int old_flags, unsigned int flags)
return 0;
}
-static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
+static int check_fsflags_compatible(const struct btrfs_fs_info *fs_info,
unsigned int flags)
{
if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL))
@@ -247,24 +247,23 @@ static int btrfs_check_ioctl_vol_args2_subvol_name(const struct btrfs_ioctl_vol_
*/
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{
- struct btrfs_inode *binode = BTRFS_I(d_inode(dentry));
+ const struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
- fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode));
+ fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(inode));
return 0;
}
int btrfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
- struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct btrfs_inode *binode = BTRFS_I(inode);
- struct btrfs_root *root = binode->root;
+ struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
- u32 binode_flags;
+ u32 inode_flags;
if (btrfs_root_readonly(root))
return -EROFS;
@@ -272,8 +271,8 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
if (fileattr_has_fsx(fa))
return -EOPNOTSUPP;
- fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags);
- old_fsflags = btrfs_inode_flags_to_fsflags(binode);
+ fsflags = btrfs_mask_fsflags_for_type(&inode->vfs_inode, fa->flags);
+ old_fsflags = btrfs_inode_flags_to_fsflags(inode);
ret = check_fsflags(old_fsflags, fsflags);
if (ret)
return ret;
@@ -282,27 +281,27 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
if (ret)
return ret;
- binode_flags = binode->flags;
+ inode_flags = inode->flags;
if (fsflags & FS_SYNC_FL)
- binode_flags |= BTRFS_INODE_SYNC;
+ inode_flags |= BTRFS_INODE_SYNC;
else
- binode_flags &= ~BTRFS_INODE_SYNC;
+ inode_flags &= ~BTRFS_INODE_SYNC;
if (fsflags & FS_IMMUTABLE_FL)
- binode_flags |= BTRFS_INODE_IMMUTABLE;
+ inode_flags |= BTRFS_INODE_IMMUTABLE;
else
- binode_flags &= ~BTRFS_INODE_IMMUTABLE;
+ inode_flags &= ~BTRFS_INODE_IMMUTABLE;
if (fsflags & FS_APPEND_FL)
- binode_flags |= BTRFS_INODE_APPEND;
+ inode_flags |= BTRFS_INODE_APPEND;
else
- binode_flags &= ~BTRFS_INODE_APPEND;
+ inode_flags &= ~BTRFS_INODE_APPEND;
if (fsflags & FS_NODUMP_FL)
- binode_flags |= BTRFS_INODE_NODUMP;
+ inode_flags |= BTRFS_INODE_NODUMP;
else
- binode_flags &= ~BTRFS_INODE_NODUMP;
+ inode_flags &= ~BTRFS_INODE_NODUMP;
if (fsflags & FS_NOATIME_FL)
- binode_flags |= BTRFS_INODE_NOATIME;
+ inode_flags |= BTRFS_INODE_NOATIME;
else
- binode_flags &= ~BTRFS_INODE_NOATIME;
+ inode_flags &= ~BTRFS_INODE_NOATIME;
/* If coming from FS_IOC_FSSETXATTR then skip unconverted flags */
if (!fa->flags_valid) {
@@ -314,32 +313,32 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
}
if (fsflags & FS_DIRSYNC_FL)
- binode_flags |= BTRFS_INODE_DIRSYNC;
+ inode_flags |= BTRFS_INODE_DIRSYNC;
else
- binode_flags &= ~BTRFS_INODE_DIRSYNC;
+ inode_flags &= ~BTRFS_INODE_DIRSYNC;
if (fsflags & FS_NOCOW_FL) {
- if (S_ISREG(inode->i_mode)) {
+ if (S_ISREG(inode->vfs_inode.i_mode)) {
/*
* It's safe to turn csums off here, no extents exist.
* Otherwise we want the flag to reflect the real COW
* status of the file and will not set it.
*/
- if (inode->i_size == 0)
- binode_flags |= BTRFS_INODE_NODATACOW |
- BTRFS_INODE_NODATASUM;
+ if (inode->vfs_inode.i_size == 0)
+ inode_flags |= BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM;
} else {
- binode_flags |= BTRFS_INODE_NODATACOW;
+ inode_flags |= BTRFS_INODE_NODATACOW;
}
} else {
/*
* Revert back under same assumptions as above
*/
- if (S_ISREG(inode->i_mode)) {
- if (inode->i_size == 0)
- binode_flags &= ~(BTRFS_INODE_NODATACOW |
- BTRFS_INODE_NODATASUM);
+ if (S_ISREG(inode->vfs_inode.i_mode)) {
+ if (inode->vfs_inode.i_size == 0)
+ inode_flags &= ~(BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM);
} else {
- binode_flags &= ~BTRFS_INODE_NODATACOW;
+ inode_flags &= ~BTRFS_INODE_NODATACOW;
}
}
@@ -349,21 +348,21 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
* things smaller.
*/
if (fsflags & FS_NOCOMP_FL) {
- binode_flags &= ~BTRFS_INODE_COMPRESS;
- binode_flags |= BTRFS_INODE_NOCOMPRESS;
+ inode_flags &= ~BTRFS_INODE_COMPRESS;
+ inode_flags |= BTRFS_INODE_NOCOMPRESS;
} else if (fsflags & FS_COMPR_FL) {
- if (IS_SWAPFILE(inode))
+ if (IS_SWAPFILE(&inode->vfs_inode))
return -ETXTBSY;
- binode_flags |= BTRFS_INODE_COMPRESS;
- binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
+ inode_flags |= BTRFS_INODE_COMPRESS;
+ inode_flags &= ~BTRFS_INODE_NOCOMPRESS;
comp = btrfs_compress_type2str(fs_info->compress_type);
if (!comp || comp[0] == 0)
comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
} else {
- binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+ inode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
/*
@@ -375,15 +374,14 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
return PTR_ERR(trans);
if (comp) {
- ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
- strlen(comp), 0);
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression",
+ comp, strlen(comp), 0);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
} else {
- ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
- 0, 0);
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0);
if (ret && ret != -ENODATA) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
@@ -391,98 +389,19 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
}
update_flags:
- binode->flags = binode_flags;
+ inode->flags = inode_flags;
+ btrfs_update_inode_mapping_flags(inode);
btrfs_sync_inode_flags_to_i_flags(inode);
- inode_inc_iversion(inode);
- inode_set_ctime_current(inode);
- ret = btrfs_update_inode(trans, BTRFS_I(inode));
+ inode_inc_iversion(&inode->vfs_inode);
+ inode_set_ctime_current(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, inode);
out_end_trans:
btrfs_end_transaction(trans);
return ret;
}
-/*
- * Start exclusive operation @type, return true on success
- */
-bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
- enum btrfs_exclusive_operation type)
-{
- bool ret = false;
-
- spin_lock(&fs_info->super_lock);
- if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
- fs_info->exclusive_operation = type;
- ret = true;
- }
- spin_unlock(&fs_info->super_lock);
-
- return ret;
-}
-
-/*
- * Conditionally allow to enter the exclusive operation in case it's compatible
- * with the running one. This must be paired with btrfs_exclop_start_unlock and
- * btrfs_exclop_finish.
- *
- * Compatibility:
- * - the same type is already running
- * - when trying to add a device and balance has been paused
- * - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
- * must check the condition first that would allow none -> @type
- */
-bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
- enum btrfs_exclusive_operation type)
-{
- spin_lock(&fs_info->super_lock);
- if (fs_info->exclusive_operation == type ||
- (fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
- type == BTRFS_EXCLOP_DEV_ADD))
- return true;
-
- spin_unlock(&fs_info->super_lock);
- return false;
-}
-
-void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
-{
- spin_unlock(&fs_info->super_lock);
-}
-
-void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
-{
- spin_lock(&fs_info->super_lock);
- WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
- spin_unlock(&fs_info->super_lock);
- sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
-}
-
-void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
- enum btrfs_exclusive_operation op)
-{
- switch (op) {
- case BTRFS_EXCLOP_BALANCE_PAUSED:
- spin_lock(&fs_info->super_lock);
- ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
- fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
- spin_unlock(&fs_info->super_lock);
- break;
- case BTRFS_EXCLOP_BALANCE:
- spin_lock(&fs_info->super_lock);
- ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
- fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
- spin_unlock(&fs_info->super_lock);
- break;
- default:
- btrfs_warn(fs_info,
- "invalid exclop balance operation %d requested", op);
- }
-}
-
-static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
+static int btrfs_ioctl_getversion(const struct inode *inode, int __user *arg)
{
return put_user(inode->i_generation, arg);
}
@@ -543,31 +462,18 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
range.minlen = max(range.minlen, minlen);
ret = btrfs_trim_fs(fs_info, &range);
- if (ret < 0)
- return ret;
if (copy_to_user(arg, &range, sizeof(range)))
return -EFAULT;
- return 0;
-}
-
-int __pure btrfs_is_empty_uuid(u8 *uuid)
-{
- int i;
-
- for (i = 0; i < BTRFS_UUID_SIZE; i++) {
- if (uuid[i])
- return 0;
- }
- return 1;
+ return ret;
}
/*
* Calculate the number of transaction items to reserve for creating a subvolume
* or snapshot, not including the inode, directory entries, or parent directory.
*/
-static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit)
+static unsigned int create_subvol_num_items(const struct btrfs_qgroup_inherit *inherit)
{
/*
* 1 to add root block
@@ -658,17 +564,12 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
ret = PTR_ERR(trans);
goto out_release_rsv;
}
- ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
- if (ret)
- goto out;
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
- /* Tree log can't currently deal with an inode which is a new root. */
- btrfs_set_log_full_commit(trans);
- ret = btrfs_qgroup_inherit(trans, 0, objectid, root->root_key.objectid, inherit);
+ ret = btrfs_qgroup_inherit(trans, 0, objectid, btrfs_root_id(root), inherit);
if (ret)
goto out;
@@ -714,11 +615,13 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_set_root_dirid(root_item, BTRFS_FIRST_FREE_OBJECTID);
key.objectid = objectid;
- key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = 0;
ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
root_item);
if (ret) {
+ int ret2;
+
/*
* Since we don't abort the transaction in this case, free the
* tree block so that we don't leak space and leave the
@@ -729,7 +632,9 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_tree_lock(leaf);
btrfs_clear_buffer_dirty(trans, leaf);
btrfs_tree_unlock(leaf);
- btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
+ ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
+ if (ret2 < 0)
+ btrfs_abort_transaction(trans, ret2);
free_extent_buffer(leaf);
goto out;
}
@@ -767,6 +672,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
goto out;
}
+ btrfs_record_new_subvolume(trans, BTRFS_I(dir));
+
d_instantiate_new(dentry, new_inode_args.inode);
new_inode_args.inode = NULL;
@@ -854,7 +761,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
pending_snapshot->readonly = readonly;
- pending_snapshot->dir = dir;
+ pending_snapshot->dir = BTRFS_I(dir);
pending_snapshot->inherit = inherit;
trans = btrfs_start_transaction(root, 0);
@@ -969,7 +876,7 @@ static int btrfs_may_delete(struct mnt_idmap *idmap,
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct mnt_idmap *idmap,
- struct inode *dir, struct dentry *child)
+ struct inode *dir, const struct dentry *child)
{
if (d_really_is_positive(child))
return -EEXIST;
@@ -1002,7 +909,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (error == -EINTR)
return error;
- dentry = lookup_one(idmap, name, parent->dentry, namelen);
+ dentry = lookup_one(idmap, &QSTR_LEN(name, namelen), parent->dentry);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
@@ -1049,7 +956,6 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
struct btrfs_qgroup_inherit *inherit)
{
int ret;
- bool snapshot_force_cow = false;
/*
* Force new buffered writes to reserve space even when NOCOW is
@@ -1068,15 +974,13 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
* creation.
*/
atomic_inc(&root->snapshot_force_cow);
- snapshot_force_cow = true;
- btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+ btrfs_wait_ordered_extents(root, U64_MAX, NULL);
ret = btrfs_mksubvol(parent, idmap, name, namelen,
root, readonly, inherit);
+ atomic_dec(&root->snapshot_force_cow);
out:
- if (snapshot_force_cow)
- atomic_dec(&root->snapshot_force_cow);
btrfs_drew_read_unlock(&root->snapshot_lock);
return ret;
}
@@ -1127,17 +1031,14 @@ static noinline int btrfs_ioctl_resize(struct file *file,
void __user *arg)
{
BTRFS_DEV_LOOKUP_ARGS(args);
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 new_size;
u64 old_size;
u64 devid = 1;
- struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_trans_handle *trans;
struct btrfs_device *device = NULL;
char *sizestr;
- char *retptr;
char *devstr = NULL;
int ret = 0;
int mod = 0;
@@ -1205,6 +1106,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (!strcmp(sizestr, "max"))
new_size = bdev_nr_bytes(device->bdev);
else {
+ char *retptr;
+
if (sizestr[0] == '-') {
mod = -1;
sizestr++;
@@ -1252,6 +1155,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
new_size = round_down(new_size, fs_info->sectorsize);
if (new_size > old_size) {
+ struct btrfs_trans_handle *trans;
+
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -1309,14 +1214,14 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
ret = btrfs_mksubvol(&file->f_path, idmap, name,
namelen, NULL, readonly, inherit);
} else {
- struct fd src = fdget(fd);
+ CLASS(fd, src)(fd);
struct inode *src_inode;
- if (!src.file) {
+ if (fd_empty(src)) {
ret = -EINVAL;
goto out_drop_write;
}
- src_inode = file_inode(src.file);
+ src_inode = file_inode(fd_file(src));
if (src_inode->i_sb != file_inode(file)->i_sb) {
btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
@@ -1342,7 +1247,6 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
BTRFS_I(src_inode)->root,
readonly, inherit);
}
- fdput(src);
}
out_drop_write:
mnt_drop_write_file(file);
@@ -1431,15 +1335,15 @@ free_args:
return ret;
}
-static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
+static noinline int btrfs_ioctl_subvol_getflags(struct btrfs_inode *inode,
void __user *arg)
{
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
u64 flags = 0;
- if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
+ if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
return -EINVAL;
down_read(&fs_info->subvol_sem);
@@ -1510,7 +1414,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
spin_unlock(&root->root_item_lock);
btrfs_warn(fs_info,
"Attempt to set subvolume %llu read-write during send",
- root->root_key.objectid);
+ btrfs_root_id(root));
ret = -EPERM;
goto out_drop_sem;
}
@@ -1542,8 +1446,8 @@ out:
return ret;
}
-static noinline int key_in_sk(struct btrfs_key *key,
- struct btrfs_ioctl_search_key *sk)
+static noinline bool key_in_sk(const struct btrfs_key *key,
+ const struct btrfs_ioctl_search_key *sk)
{
struct btrfs_key test;
int ret;
@@ -1554,7 +1458,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
ret = btrfs_comp_cpu_keys(key, &test);
if (ret < 0)
- return 0;
+ return false;
test.objectid = sk->max_objectid;
test.type = sk->max_type;
@@ -1562,13 +1466,13 @@ static noinline int key_in_sk(struct btrfs_key *key,
ret = btrfs_comp_cpu_keys(key, &test);
if (ret > 0)
- return 0;
- return 1;
+ return false;
+ return true;
}
static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
- struct btrfs_ioctl_search_key *sk,
+ const struct btrfs_ioctl_search_key *sk,
u64 *buf_size,
char __user *ubuf,
unsigned long *sk_offset,
@@ -1625,8 +1529,8 @@ static noinline int copy_to_sk(struct btrfs_path *path,
}
sh.objectid = key->objectid;
- sh.offset = key->offset;
sh.type = key->type;
+ sh.offset = key->offset;
sh.len = item_len;
sh.transid = found_transid;
@@ -1699,13 +1603,12 @@ out:
return ret;
}
-static noinline int search_ioctl(struct inode *inode,
+static noinline int search_ioctl(struct btrfs_root *root,
struct btrfs_ioctl_search_key *sk,
u64 *buf_size,
char __user *ubuf)
{
- struct btrfs_fs_info *info = inode_to_fs_info(inode);
- struct btrfs_root *root;
+ struct btrfs_fs_info *info = root->fs_info;
struct btrfs_key key;
struct btrfs_path *path;
int ret;
@@ -1722,9 +1625,10 @@ static noinline int search_ioctl(struct inode *inode,
return -ENOMEM;
if (sk->tree_id == 0) {
- /* search the root of the inode that was passed */
- root = btrfs_grab_root(BTRFS_I(inode)->root);
+ /* Search the root that we got passed. */
+ root = btrfs_grab_root(root);
} else {
+ /* Look up the root from the arguments. */
root = btrfs_get_fs_root(info, sk->tree_id, true);
if (IS_ERR(root)) {
btrfs_free_path(path);
@@ -1737,21 +1641,19 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
- ret = -EFAULT;
/*
* Ensure that the whole user buffer is faulted in at sub-page
* granularity, otherwise the loop may live-lock.
*/
- if (fault_in_subpage_writeable(ubuf + sk_offset,
- *buf_size - sk_offset))
+ if (fault_in_subpage_writeable(ubuf + sk_offset, *buf_size - sk_offset)) {
+ ret = -EFAULT;
break;
+ }
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
- if (ret != 0) {
- if (ret > 0)
- ret = 0;
- goto err;
- }
+ if (ret)
+ break;
+
ret = copy_to_sk(path, &key, sk, buf_size, ubuf,
&sk_offset, &num_found);
btrfs_release_path(path);
@@ -1759,16 +1661,17 @@ static noinline int search_ioctl(struct inode *inode,
break;
}
+ /* Normalize return values from btrfs_search_forward() and copy_to_sk(). */
if (ret > 0)
ret = 0;
-err:
+
sk->nr_items = num_found;
btrfs_put_root(root);
btrfs_free_path(path);
return ret;
}
-static noinline int btrfs_ioctl_tree_search(struct inode *inode,
+static noinline int btrfs_ioctl_tree_search(struct btrfs_root *root,
void __user *argp)
{
struct btrfs_ioctl_search_args __user *uargs = argp;
@@ -1784,7 +1687,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode,
buf_size = sizeof(uargs->buf);
- ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
+ ret = search_ioctl(root, &sk, &buf_size, uargs->buf);
/*
* In the origin implementation an overflow is handled by returning a
@@ -1798,7 +1701,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode,
return ret;
}
-static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
+static noinline int btrfs_ioctl_tree_search_v2(struct btrfs_root *root,
void __user *argp)
{
struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
@@ -1820,7 +1723,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
if (buf_size > buf_limit)
buf_size = buf_limit;
- ret = search_ioctl(inode, &args.key, &buf_size,
+ ret = search_ioctl(root, &args.key, &buf_size,
(char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
ret = -EFAULT;
@@ -1917,9 +1820,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
struct btrfs_ioctl_ino_lookup_user_args *args)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
- struct super_block *sb = inode->i_sb;
- struct btrfs_key upper_limit = BTRFS_I(inode)->location;
- u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
+ u64 upper_limit = btrfs_ino(BTRFS_I(inode));
+ u64 treeid = btrfs_root_id(BTRFS_I(inode)->root);
u64 dirid = args->dirid;
unsigned long item_off;
unsigned long item_len;
@@ -1929,7 +1831,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
struct btrfs_path *path;
struct btrfs_key key, key2;
struct extent_buffer *leaf;
- struct inode *temp_inode;
char *ptr;
int slot;
int len;
@@ -1944,7 +1845,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
* If the bottom subvolume does not exist directly under upper_limit,
* construct the path in from the bottom up.
*/
- if (dirid != upper_limit.objectid) {
+ if (dirid != upper_limit) {
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
root = btrfs_get_fs_root(fs_info, treeid, true);
@@ -1957,6 +1858,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
while (1) {
+ struct btrfs_inode *temp_inode;
+
ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
goto out_put;
@@ -2006,20 +1909,20 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
* btree and lock the same leaf.
*/
btrfs_release_path(path);
- temp_inode = btrfs_iget(sb, key2.objectid, root);
+ temp_inode = btrfs_iget(key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
goto out_put;
}
- ret = inode_permission(idmap, temp_inode,
+ ret = inode_permission(idmap, &temp_inode->vfs_inode,
MAY_READ | MAY_EXEC);
- iput(temp_inode);
+ iput(&temp_inode->vfs_inode);
if (ret) {
ret = -EACCES;
goto out_put;
}
- if (key.offset == upper_limit.objectid)
+ if (key.offset == upper_limit)
break;
if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
ret = -EACCES;
@@ -2091,7 +1994,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
* path is reset so it's consistent with btrfs_search_path_in_tree.
*/
if (args->treeid == 0)
- args->treeid = root->root_key.objectid;
+ args->treeid = btrfs_root_id(root);
if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
args->name[0] = 0;
@@ -2140,7 +2043,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
inode = file_inode(file);
if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
- BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
/*
* The subvolume does not exist under fd with which this is
* called
@@ -2187,7 +2090,7 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
fs_info = BTRFS_I(inode)->root->fs_info;
/* Get root_item of inode's subvolume */
- key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+ key.objectid = btrfs_root_id(BTRFS_I(inode)->root);
root = btrfs_get_fs_root(fs_info, key.objectid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
@@ -2302,7 +2205,7 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
return PTR_ERR(rootrefs);
}
- objectid = root->root_key.objectid;
+ objectid = btrfs_root_id(root);
key.objectid = objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = rootrefs->min_treeid;
@@ -2385,8 +2288,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
struct mnt_idmap *idmap = file_mnt_idmap(file);
char *subvol_name, *subvol_name_ptr = NULL;
- int subvol_namelen;
- int err = 0;
+ int ret = 0;
bool destroy_parent = false;
/* We don't support snapshots with extent tree v2 yet. */
@@ -2402,7 +2304,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
return PTR_ERR(vol_args2);
if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
- err = -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
goto out;
}
@@ -2411,31 +2313,31 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* name, same as v1 currently does.
*/
if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
- err = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2);
- if (err < 0)
+ ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2);
+ if (ret < 0)
goto out;
subvol_name = vol_args2->name;
- err = mnt_want_write_file(file);
- if (err)
+ ret = mnt_want_write_file(file);
+ if (ret)
goto out;
} else {
struct inode *old_dir;
if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
- err = mnt_want_write_file(file);
- if (err)
+ ret = mnt_want_write_file(file);
+ if (ret)
goto out;
dentry = btrfs_get_dentry(fs_info->sb,
BTRFS_FIRST_FREE_OBJECTID,
vol_args2->subvolid, 0);
if (IS_ERR(dentry)) {
- err = PTR_ERR(dentry);
+ ret = PTR_ERR(dentry);
goto out_drop_write;
}
@@ -2455,7 +2357,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
dput(dentry);
if (IS_ERR(parent)) {
- err = PTR_ERR(parent);
+ ret = PTR_ERR(parent);
goto out_drop_write;
}
old_dir = dir;
@@ -2479,14 +2381,14 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* to delete without an idmapped mount.
*/
if (old_dir != dir && idmap != &nop_mnt_idmap) {
- err = -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
goto free_parent;
}
subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
fs_info, vol_args2->subvolid);
if (IS_ERR(subvol_name_ptr)) {
- err = PTR_ERR(subvol_name_ptr);
+ ret = PTR_ERR(subvol_name_ptr);
goto free_parent;
}
/* subvol_name_ptr is already nul terminated */
@@ -2497,41 +2399,39 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- err = btrfs_check_ioctl_vol_args_path(vol_args);
- if (err < 0)
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
goto out;
subvol_name = vol_args->name;
- err = mnt_want_write_file(file);
- if (err)
+ ret = mnt_want_write_file(file);
+ if (ret)
goto out;
}
- subvol_namelen = strlen(subvol_name);
-
if (strchr(subvol_name, '/') ||
- strncmp(subvol_name, "..", subvol_namelen) == 0) {
- err = -EINVAL;
+ strcmp(subvol_name, "..") == 0) {
+ ret = -EINVAL;
goto free_subvol_name;
}
if (!S_ISDIR(dir->i_mode)) {
- err = -ENOTDIR;
+ ret = -ENOTDIR;
goto free_subvol_name;
}
- err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (err == -EINTR)
+ ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
+ if (ret == -EINTR)
goto free_subvol_name;
- dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen);
+ dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
if (IS_ERR(dentry)) {
- err = PTR_ERR(dentry);
+ ret = PTR_ERR(dentry);
goto out_unlock_dir;
}
if (d_really_is_negative(dentry)) {
- err = -ENOENT;
+ ret = -ENOENT;
goto out_dput;
}
@@ -2551,7 +2451,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* Users who want to delete empty subvols should try
* rmdir(2).
*/
- err = -EPERM;
+ ret = -EPERM;
if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
goto out_dput;
@@ -2562,29 +2462,29 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* of the subvol, not a random directory contained
* within it.
*/
- err = -EINVAL;
+ ret = -EINVAL;
if (root == dest)
goto out_dput;
- err = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
- if (err)
+ ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
+ if (ret)
goto out_dput;
}
/* check if subvolume may be deleted by a user */
- err = btrfs_may_delete(idmap, dir, dentry, 1);
- if (err)
+ ret = btrfs_may_delete(idmap, dir, dentry, 1);
+ if (ret)
goto out_dput;
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
- err = -EINVAL;
+ ret = -EINVAL;
goto out_dput;
}
btrfs_inode_lock(BTRFS_I(inode), 0);
- err = btrfs_delete_subvolume(BTRFS_I(dir), dentry);
+ ret = btrfs_delete_subvolume(BTRFS_I(dir), dentry);
btrfs_inode_unlock(BTRFS_I(inode), 0);
- if (!err)
+ if (!ret)
d_delete_notify(dir, dentry);
out_dput:
@@ -2601,7 +2501,7 @@ out_drop_write:
out:
kfree(vol_args2);
kfree(vol_args);
- return err;
+ return ret;
}
static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
@@ -2640,6 +2540,15 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
+ /*
+ * Don't allow defrag on pre-content watched files, as it could
+ * populate the page cache with 0's via readahead.
+ */
+ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (argp) {
if (copy_from_user(&range, argp, sizeof(range))) {
ret = -EFAULT;
@@ -2658,7 +2567,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
/* the rest are all set to zero by kzalloc */
range.len = (u64)-1;
}
- ret = btrfs_defrag_file(file_inode(file), &file->f_ra,
+ ret = btrfs_defrag_file(BTRFS_I(file_inode(file)), &file->f_ra,
&range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
@@ -2850,7 +2759,7 @@ out_free:
return ret;
}
-static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
+static long btrfs_ioctl_fs_info(const struct btrfs_fs_info *fs_info,
void __user *arg)
{
struct btrfs_ioctl_fs_info_args *fi_args;
@@ -2904,7 +2813,7 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
return ret;
}
-static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
+static long btrfs_ioctl_dev_info(const struct btrfs_fs_info *fs_info,
void __user *arg)
{
BTRFS_DEV_LOOKUP_ARGS(args);
@@ -2981,7 +2890,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
ret = PTR_ERR(new_root);
goto out;
}
- if (!is_fstree(new_root->root_key.objectid)) {
+ if (!is_fstree(btrfs_root_id(new_root))) {
ret = -ENOENT;
goto out_free;
}
@@ -3012,7 +2921,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
- btrfs_mark_buffer_dirty(trans, path->nodes[0]);
btrfs_release_path(path);
btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
@@ -3758,15 +3666,43 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
goto drop_write;
}
- down_write(&fs_info->subvol_sem);
-
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
+ down_write(&fs_info->subvol_sem);
ret = btrfs_quota_enable(fs_info, sa);
+ up_write(&fs_info->subvol_sem);
break;
case BTRFS_QUOTA_CTL_DISABLE:
+ /*
+ * Lock the cleaner mutex to prevent races with concurrent
+ * relocation, because relocation may be building backrefs for
+ * blocks of the quota root while we are deleting the root. This
+ * is like dropping fs roots of deleted snapshots/subvolumes, we
+ * need the same protection.
+ *
+ * This also prevents races between concurrent tasks trying to
+ * disable quotas, because we will unlock and relock
+ * qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+ *
+ * We take this here because we have the dependency of
+ *
+ * inode_lock -> subvol_sem
+ *
+ * because of rename. With relocation we can prealloc extents,
+ * so that makes the dependency chain
+ *
+ * cleaner_mutex -> inode_lock -> subvol_sem
+ *
+ * so we must take the cleaner_mutex here before we take the
+ * subvol_sem. The deadlock can't actually happen, but this
+ * quiets lockdep.
+ */
+ mutex_lock(&fs_info->cleaner_mutex);
+ down_write(&fs_info->subvol_sem);
ret = btrfs_quota_disable(fs_info);
+ up_write(&fs_info->subvol_sem);
+ mutex_unlock(&fs_info->cleaner_mutex);
break;
default:
ret = -EINVAL;
@@ -3774,18 +3710,34 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
}
kfree(sa);
- up_write(&fs_info->subvol_sem);
drop_write:
mnt_drop_write_file(file);
return ret;
}
+/*
+ * Quick check for ioctl handlers if quotas are enabled. Proper locking must be
+ * done before any operations.
+ */
+static bool qgroup_enabled(struct btrfs_fs_info *fs_info)
+{
+ bool ret = true;
+
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ if (!fs_info->quota_root)
+ ret = false;
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
+ return ret;
+}
+
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_assign_args *sa;
+ struct btrfs_qgroup_list *prealloc = NULL;
struct btrfs_trans_handle *trans;
int ret;
int err;
@@ -3793,6 +3745,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!qgroup_enabled(root->fs_info))
+ return -ENOTCONN;
+
ret = mnt_want_write_file(file);
if (ret)
return ret;
@@ -3803,14 +3758,27 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
goto drop_write;
}
+ if (sa->assign) {
+ prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
+ if (!prealloc) {
+ ret = -ENOMEM;
+ goto drop_write;
+ }
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
+ /*
+ * Prealloc ownership is moved to the relation handler, there it's used
+ * or freed on error.
+ */
if (sa->assign) {
- ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
+ ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc);
+ prealloc = NULL;
} else {
ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
}
@@ -3820,13 +3788,15 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
err = btrfs_run_qgroups(trans);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (err < 0)
- btrfs_handle_fs_error(fs_info, err,
- "failed to update qgroup status and info");
+ btrfs_warn(fs_info,
+ "qgroup status update failed after %s relation, marked as inconsistent",
+ sa->assign ? "adding" : "deleting");
err = btrfs_end_transaction(trans);
if (err && !ret)
ret = err;
out:
+ kfree(prealloc);
kfree(sa);
drop_write:
mnt_drop_write_file(file);
@@ -3845,6 +3815,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!qgroup_enabled(root->fs_info))
+ return -ENOTCONN;
+
ret = mnt_want_write_file(file);
if (ret)
return ret;
@@ -3901,6 +3874,9 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!qgroup_enabled(root->fs_info))
+ return -ENOTCONN;
+
ret = mnt_want_write_file(file);
if (ret)
return ret;
@@ -3920,7 +3896,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
qgroupid = sa->qgroupid;
if (!qgroupid) {
/* take the current subvol as qgroup */
- qgroupid = root->root_key.objectid;
+ qgroupid = btrfs_root_id(root);
}
ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
@@ -3946,6 +3922,9 @@ static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!qgroup_enabled(fs_info))
+ return -ENOTCONN;
+
ret = mnt_want_write_file(file);
if (ret)
return ret;
@@ -3989,8 +3968,7 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
return 0;
}
-static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
- void __user *arg)
+static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -4051,7 +4029,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
!btrfs_is_empty_uuid(root_item->received_uuid)) {
ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
- root->root_key.objectid);
+ btrfs_root_id(root));
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
@@ -4075,7 +4053,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
ret = btrfs_uuid_tree_add(trans, sa->uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
- root->root_key.objectid);
+ btrfs_root_id(root));
if (ret < 0 && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
@@ -4266,7 +4244,7 @@ static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
return 0;
}
-static int check_feature_bits(struct btrfs_fs_info *fs_info,
+static int check_feature_bits(const struct btrfs_fs_info *fs_info,
enum btrfs_feature_set set,
u64 change_mask, u64 flags, u64 supported_flags,
u64 safe_set, u64 safe_clear)
@@ -4402,7 +4380,7 @@ out_drop_write:
return ret;
}
-static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
+static int _btrfs_ioctl_send(struct btrfs_root *root, void __user *argp, bool compat)
{
struct btrfs_ioctl_send_args *arg;
int ret;
@@ -4433,7 +4411,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
if (IS_ERR(arg))
return PTR_ERR(arg);
}
- ret = btrfs_ioctl_send(inode, arg);
+ ret = btrfs_ioctl_send(root, arg);
kfree(arg);
return ret;
}
@@ -4445,12 +4423,17 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
size_t copy_end;
+ struct btrfs_inode *inode = BTRFS_I(file_inode(file));
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
+ u64 disk_bytenr, disk_io_size;
+ struct extent_state *cached_state = NULL;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
@@ -4503,7 +4486,32 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos;
- ret = btrfs_encoded_read(&kiocb, &iter, &args);
+ ret = btrfs_encoded_read(&kiocb, &iter, &args, &cached_state,
+ &disk_bytenr, &disk_io_size);
+
+ if (ret == -EIOCBQUEUED) {
+ bool unlocked = false;
+ u64 start, lockend, count;
+
+ start = ALIGN_DOWN(kiocb.ki_pos, fs_info->sectorsize);
+ lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
+
+ if (args.compression)
+ count = disk_io_size;
+ else
+ count = args.len;
+
+ ret = btrfs_encoded_read_regular(&kiocb, &iter, start, lockend,
+ &cached_state, disk_bytenr,
+ disk_io_size, count,
+ args.compression, &unlocked);
+
+ if (!unlocked) {
+ btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ }
+ }
+
if (ret >= 0) {
fsnotify_access(file);
if (copy_to_user(argp + copy_end,
@@ -4600,7 +4608,7 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
goto out_iov;
init_sync_kiocb(&kiocb, file);
- ret = kiocb_set_rw_flags(&kiocb, 0);
+ ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
if (ret)
goto out_iov;
kiocb.ki_pos = pos;
@@ -4621,6 +4629,587 @@ out_acct:
return ret;
}
+/*
+ * Context that's attached to an encoded read io_uring command, in cmd->pdu. It
+ * contains the fields in btrfs_uring_read_extent that are necessary to finish
+ * off and cleanup the I/O in btrfs_uring_read_finished.
+ */
+struct btrfs_uring_priv {
+ struct io_uring_cmd *cmd;
+ struct page **pages;
+ unsigned long nr_pages;
+ struct kiocb iocb;
+ struct iovec *iov;
+ struct iov_iter iter;
+ struct extent_state *cached_state;
+ u64 count;
+ u64 start;
+ u64 lockend;
+ int err;
+ bool compressed;
+};
+
+struct io_btrfs_cmd {
+ struct btrfs_uring_priv *priv;
+};
+
+static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd);
+ struct btrfs_uring_priv *priv = bc->priv;
+ struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ unsigned long index;
+ u64 cur;
+ size_t page_offset;
+ ssize_t ret;
+
+ /* The inode lock has already been acquired in btrfs_uring_read_extent. */
+ btrfs_lockdep_inode_acquire(inode, i_rwsem);
+
+ if (priv->err) {
+ ret = priv->err;
+ goto out;
+ }
+
+ if (priv->compressed) {
+ index = 0;
+ page_offset = 0;
+ } else {
+ index = (priv->iocb.ki_pos - priv->start) >> PAGE_SHIFT;
+ page_offset = offset_in_page(priv->iocb.ki_pos - priv->start);
+ }
+ cur = 0;
+ while (cur < priv->count) {
+ size_t bytes = min_t(size_t, priv->count - cur, PAGE_SIZE - page_offset);
+
+ if (copy_page_to_iter(priv->pages[index], page_offset, bytes,
+ &priv->iter) != bytes) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ index++;
+ cur += bytes;
+ page_offset = 0;
+ }
+ ret = priv->count;
+
+out:
+ btrfs_unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
+ add_rchar(current, ret);
+
+ for (index = 0; index < priv->nr_pages; index++)
+ __free_page(priv->pages[index]);
+
+ kfree(priv->pages);
+ kfree(priv->iov);
+ kfree(priv);
+}
+
+void btrfs_uring_read_extent_endio(void *ctx, int err)
+{
+ struct btrfs_uring_priv *priv = ctx;
+ struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(priv->cmd, struct io_btrfs_cmd);
+
+ priv->err = err;
+ bc->priv = priv;
+
+ io_uring_cmd_complete_in_task(priv->cmd, btrfs_uring_read_finished);
+}
+
+static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
+ u64 start, u64 lockend,
+ struct extent_state *cached_state,
+ u64 disk_bytenr, u64 disk_io_size,
+ size_t count, bool compressed,
+ struct iovec *iov, struct io_uring_cmd *cmd)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct page **pages;
+ struct btrfs_uring_priv *priv = NULL;
+ unsigned long nr_pages;
+ int ret;
+
+ nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+ ret = btrfs_alloc_page_array(nr_pages, pages, 0);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out_fail;
+ }
+
+ priv = kmalloc(sizeof(*priv), GFP_NOFS);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto out_fail;
+ }
+
+ priv->iocb = *iocb;
+ priv->iov = iov;
+ priv->iter = *iter;
+ priv->count = count;
+ priv->cmd = cmd;
+ priv->cached_state = cached_state;
+ priv->compressed = compressed;
+ priv->nr_pages = nr_pages;
+ priv->pages = pages;
+ priv->start = start;
+ priv->lockend = lockend;
+ priv->err = 0;
+
+ ret = btrfs_encoded_read_regular_fill_pages(inode, disk_bytenr,
+ disk_io_size, pages, priv);
+ if (ret && ret != -EIOCBQUEUED)
+ goto out_fail;
+
+ /*
+ * If we return -EIOCBQUEUED, we're deferring the cleanup to
+ * btrfs_uring_read_finished(), which will handle unlocking the extent
+ * and inode and freeing the allocations.
+ */
+
+ /*
+ * We're returning to userspace with the inode lock held, and that's
+ * okay - it'll get unlocked in a worker thread. Call
+ * btrfs_lockdep_inode_release() to avoid confusing lockdep.
+ */
+ btrfs_lockdep_inode_release(inode, i_rwsem);
+
+ return -EIOCBQUEUED;
+
+out_fail:
+ btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ kfree(priv);
+ return ret;
+}
+
+struct btrfs_uring_encoded_data {
+ struct btrfs_ioctl_encoded_io_args args;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov;
+ struct iov_iter iter;
+};
+
+static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags);
+ size_t copy_end;
+ int ret;
+ u64 disk_bytenr, disk_io_size;
+ struct file *file;
+ struct btrfs_inode *inode;
+ struct btrfs_fs_info *fs_info;
+ struct extent_io_tree *io_tree;
+ loff_t pos;
+ struct kiocb kiocb;
+ struct extent_state *cached_state = NULL;
+ u64 start, lockend;
+ void __user *sqe_addr;
+ struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_acct;
+ }
+ file = cmd->file;
+ inode = BTRFS_I(file->f_inode);
+ fs_info = inode->root->fs_info;
+ io_tree = &inode->io_tree;
+ sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
+
+ if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags);
+#else
+ return -ENOTTY;
+#endif
+ } else {
+ copy_end = copy_end_kernel;
+ }
+
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_NOFS);
+ if (!data) {
+ ret = -ENOMEM;
+ goto out_acct;
+ }
+
+ io_uring_cmd_get_async_data(cmd)->op_data = data;
+
+ if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
+
+ if (copy_from_user(&args32, sqe_addr, copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+
+ data->args.iov = compat_ptr(args32.iov);
+ data->args.iovcnt = args32.iovcnt;
+ data->args.offset = args32.offset;
+ data->args.flags = args32.flags;
+#endif
+ } else {
+ if (copy_from_user(&data->args, sqe_addr, copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+
+ if (data->args.flags != 0) {
+ ret = -EINVAL;
+ goto out_acct;
+ }
+
+ data->iov = data->iovstack;
+ ret = import_iovec(ITER_DEST, data->args.iov, data->args.iovcnt,
+ ARRAY_SIZE(data->iovstack), &data->iov,
+ &data->iter);
+ if (ret < 0)
+ goto out_acct;
+
+ if (iov_iter_count(&data->iter) == 0) {
+ ret = 0;
+ goto out_free;
+ }
+ }
+
+ pos = data->args.offset;
+ ret = rw_verify_area(READ, file, &pos, data->args.len);
+ if (ret < 0)
+ goto out_free;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = pos;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ kiocb.ki_flags |= IOCB_NOWAIT;
+
+ start = ALIGN_DOWN(pos, fs_info->sectorsize);
+ lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
+
+ ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state,
+ &disk_bytenr, &disk_io_size);
+ if (ret == -EAGAIN)
+ goto out_acct;
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ goto out_free;
+
+ file_accessed(file);
+
+ if (copy_to_user(sqe_addr + copy_end,
+ (const char *)&data->args + copy_end_kernel,
+ sizeof(data->args) - copy_end_kernel)) {
+ if (ret == -EIOCBQUEUED) {
+ btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ }
+ ret = -EFAULT;
+ goto out_free;
+ }
+
+ if (ret == -EIOCBQUEUED) {
+ u64 count = min_t(u64, iov_iter_count(&data->iter), disk_io_size);
+
+ /* Match ioctl by not returning past EOF if uncompressed. */
+ if (!data->args.compression)
+ count = min_t(u64, count, data->args.len);
+
+ ret = btrfs_uring_read_extent(&kiocb, &data->iter, start, lockend,
+ cached_state, disk_bytenr, disk_io_size,
+ count, data->args.compression,
+ data->iov, cmd);
+
+ goto out_acct;
+ }
+
+out_free:
+ kfree(data->iov);
+
+out_acct:
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+
+ return ret;
+}
+
+static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ loff_t pos;
+ struct kiocb kiocb;
+ struct file *file;
+ ssize_t ret;
+ void __user *sqe_addr;
+ struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_acct;
+ }
+
+ file = cmd->file;
+ sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
+
+ if (!(file->f_mode & FMODE_WRITE)) {
+ ret = -EBADF;
+ goto out_acct;
+ }
+
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_NOFS);
+ if (!data) {
+ ret = -ENOMEM;
+ goto out_acct;
+ }
+
+ io_uring_cmd_get_async_data(cmd)->op_data = data;
+
+ if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
+
+ if (copy_from_user(&args32, sqe_addr, sizeof(args32))) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ data->args.iov = compat_ptr(args32.iov);
+ data->args.iovcnt = args32.iovcnt;
+ data->args.offset = args32.offset;
+ data->args.flags = args32.flags;
+ data->args.len = args32.len;
+ data->args.unencoded_len = args32.unencoded_len;
+ data->args.unencoded_offset = args32.unencoded_offset;
+ data->args.compression = args32.compression;
+ data->args.encryption = args32.encryption;
+ memcpy(data->args.reserved, args32.reserved,
+ sizeof(data->args.reserved));
+#else
+ ret = -ENOTTY;
+ goto out_acct;
+#endif
+ } else {
+ if (copy_from_user(&data->args, sqe_addr, sizeof(data->args))) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+
+ ret = -EINVAL;
+ if (data->args.flags != 0)
+ goto out_acct;
+ if (memchr_inv(data->args.reserved, 0, sizeof(data->args.reserved)))
+ goto out_acct;
+ if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
+ data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+ goto out_acct;
+ if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
+ data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
+ goto out_acct;
+ if (data->args.unencoded_offset > data->args.unencoded_len)
+ goto out_acct;
+ if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset)
+ goto out_acct;
+
+ data->iov = data->iovstack;
+ ret = import_iovec(ITER_SOURCE, data->args.iov, data->args.iovcnt,
+ ARRAY_SIZE(data->iovstack), &data->iov,
+ &data->iter);
+ if (ret < 0)
+ goto out_acct;
+
+ if (iov_iter_count(&data->iter) == 0) {
+ ret = 0;
+ goto out_iov;
+ }
+ }
+
+ if (issue_flags & IO_URING_F_NONBLOCK) {
+ ret = -EAGAIN;
+ goto out_acct;
+ }
+
+ pos = data->args.offset;
+ ret = rw_verify_area(WRITE, file, &pos, data->args.len);
+ if (ret < 0)
+ goto out_iov;
+
+ init_sync_kiocb(&kiocb, file);
+ ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
+ if (ret)
+ goto out_iov;
+ kiocb.ki_pos = pos;
+
+ file_start_write(file);
+
+ ret = btrfs_do_write_iter(&kiocb, &data->iter, &data->args);
+ if (ret > 0)
+ fsnotify_modify(file);
+
+ file_end_write(file);
+out_iov:
+ kfree(data->iov);
+out_acct:
+ if (ret > 0)
+ add_wchar(current, ret);
+ inc_syscw(current);
+ return ret;
+}
+
+int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ switch (cmd->cmd_op) {
+ case BTRFS_IOC_ENCODED_READ:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ case BTRFS_IOC_ENCODED_READ_32:
+#endif
+ return btrfs_uring_encoded_read(cmd, issue_flags);
+
+ case BTRFS_IOC_ENCODED_WRITE:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ case BTRFS_IOC_ENCODED_WRITE_32:
+#endif
+ return btrfs_uring_encoded_write(cmd, issue_flags);
+ }
+
+ return -EINVAL;
+}
+
+static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *argp)
+{
+ struct btrfs_root *root;
+ struct btrfs_ioctl_subvol_wait args = { 0 };
+ signed long sched_ret;
+ int refs;
+ u64 root_flags;
+ bool wait_for_deletion = false;
+ bool found = false;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+
+ switch (args.mode) {
+ case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED:
+ /*
+ * Wait for the first one deleted that waits until all previous
+ * are cleaned.
+ */
+ spin_lock(&fs_info->trans_lock);
+ if (!list_empty(&fs_info->dead_roots)) {
+ root = list_last_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
+ args.subvolid = btrfs_root_id(root);
+ found = true;
+ }
+ spin_unlock(&fs_info->trans_lock);
+ if (!found)
+ return -ENOENT;
+
+ fallthrough;
+ case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE:
+ if ((0 < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) ||
+ BTRFS_LAST_FREE_OBJECTID < args.subvolid)
+ return -EINVAL;
+ break;
+ case BTRFS_SUBVOL_SYNC_COUNT:
+ spin_lock(&fs_info->trans_lock);
+ args.count = list_count_nodes(&fs_info->dead_roots);
+ spin_unlock(&fs_info->trans_lock);
+ if (copy_to_user(argp, &args, sizeof(args)))
+ return -EFAULT;
+ return 0;
+ case BTRFS_SUBVOL_SYNC_PEEK_FIRST:
+ spin_lock(&fs_info->trans_lock);
+ /* Last in the list was deleted first. */
+ if (!list_empty(&fs_info->dead_roots)) {
+ root = list_last_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
+ args.subvolid = btrfs_root_id(root);
+ } else {
+ args.subvolid = 0;
+ }
+ spin_unlock(&fs_info->trans_lock);
+ if (copy_to_user(argp, &args, sizeof(args)))
+ return -EFAULT;
+ return 0;
+ case BTRFS_SUBVOL_SYNC_PEEK_LAST:
+ spin_lock(&fs_info->trans_lock);
+ /* First in the list was deleted last. */
+ if (!list_empty(&fs_info->dead_roots)) {
+ root = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
+ args.subvolid = btrfs_root_id(root);
+ } else {
+ args.subvolid = 0;
+ }
+ spin_unlock(&fs_info->trans_lock);
+ if (copy_to_user(argp, &args, sizeof(args)))
+ return -EFAULT;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ /* 32bit limitation: fs_roots_radix key is not wide enough. */
+ if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX)
+ return -EOVERFLOW;
+
+ while (1) {
+ /* Wait for the specific one. */
+ if (down_read_interruptible(&fs_info->subvol_sem) == -EINTR)
+ return -EINTR;
+ refs = -1;
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ root = radix_tree_lookup(&fs_info->fs_roots_radix,
+ (unsigned long)args.subvolid);
+ if (root) {
+ spin_lock(&root->root_item_lock);
+ refs = btrfs_root_refs(&root->root_item);
+ root_flags = btrfs_root_flags(&root->root_item);
+ spin_unlock(&root->root_item_lock);
+ }
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ up_read(&fs_info->subvol_sem);
+
+ /* Subvolume does not exist. */
+ if (!root)
+ return -ENOENT;
+
+ /* Subvolume not deleted at all. */
+ if (refs > 0)
+ return -EEXIST;
+ /* We've waited and now the subvolume is gone. */
+ if (wait_for_deletion && refs == -1) {
+ /* Return the one we waited for as the last one. */
+ if (copy_to_user(argp, &args, sizeof(args)))
+ return -EFAULT;
+ return 0;
+ }
+
+ /* Subvolume not found on the first try (deleted or never existed). */
+ if (refs == -1)
+ return -ENOENT;
+
+ wait_for_deletion = true;
+ ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD);
+ sched_ret = schedule_timeout_interruptible(HZ);
+ /* Early wake up or error. */
+ if (sched_ret != 0)
+ return -EINTR;
+ }
+
+ return 0;
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -4651,7 +5240,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SNAP_DESTROY_V2:
return btrfs_ioctl_snap_destroy(file, argp, true);
case BTRFS_IOC_SUBVOL_GETFLAGS:
- return btrfs_ioctl_subvol_getflags(inode, argp);
+ return btrfs_ioctl_subvol_getflags(BTRFS_I(inode), argp);
case BTRFS_IOC_SUBVOL_SETFLAGS:
return btrfs_ioctl_subvol_setflags(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -4673,9 +5262,9 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(fs_info, argp);
case BTRFS_IOC_TREE_SEARCH:
- return btrfs_ioctl_tree_search(inode, argp);
+ return btrfs_ioctl_tree_search(root, argp);
case BTRFS_IOC_TREE_SEARCH_V2:
- return btrfs_ioctl_tree_search_v2(inode, argp);
+ return btrfs_ioctl_tree_search_v2(root, argp);
case BTRFS_IOC_INO_LOOKUP:
return btrfs_ioctl_ino_lookup(root, argp);
case BTRFS_IOC_INO_PATHS:
@@ -4694,11 +5283,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);
/*
- * The transaction thread may want to do more work,
- * namely it pokes the cleaner kthread that will start
- * processing uncleaned subvols.
+ * There may be work for the cleaner kthread to do (subvolume
+ * deletion, delayed iputs, defrag inodes, etc), so wake it up.
*/
- wake_up_process(fs_info->transaction_kthread);
+ wake_up_process(fs_info->cleaner_kthread);
return ret;
}
case BTRFS_IOC_START_SYNC:
@@ -4724,10 +5312,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif
case BTRFS_IOC_SEND:
- return _btrfs_ioctl_send(inode, argp, false);
+ return _btrfs_ioctl_send(root, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32:
- return _btrfs_ioctl_send(inode, argp, true);
+ return _btrfs_ioctl_send(root, argp, true);
#endif
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp);
@@ -4744,7 +5332,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_QUOTA_RESCAN_STATUS:
return btrfs_ioctl_quota_rescan_status(fs_info, argp);
case BTRFS_IOC_QUOTA_RESCAN_WAIT:
- return btrfs_ioctl_quota_rescan_wait(fs_info, argp);
+ return btrfs_ioctl_quota_rescan_wait(fs_info);
case BTRFS_IOC_DEV_REPLACE:
return btrfs_ioctl_dev_replace(fs_info, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
@@ -4763,6 +5351,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return fsverity_ioctl_enable(file, (const void __user *)argp);
case FS_IOC_MEASURE_VERITY:
return fsverity_ioctl_measure(file, argp);
+ case FS_IOC_READ_VERITY_METADATA:
+ return fsverity_ioctl_read_metadata(file, argp);
case BTRFS_IOC_ENCODED_READ:
return btrfs_ioctl_encoded_read(file, argp, false);
case BTRFS_IOC_ENCODED_WRITE:
@@ -4773,6 +5363,8 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_ENCODED_WRITE_32:
return btrfs_ioctl_encoded_write(file, argp, true);
#endif
+ case BTRFS_IOC_SUBVOL_SYNC_WAIT:
+ return btrfs_ioctl_subvol_sync(fs_info, argp);
}
return -ENOTTY;