summaryrefslogtreecommitdiff
path: root/fs/btrfs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r--fs/btrfs/super.c348
1 files changed, 187 insertions, 161 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3a4dce153645..6e71a2a78363 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,12 +61,21 @@
#include "tests/btrfs-tests.h"
#include "qgroup.h"
-#include "backref.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
static const struct super_operations btrfs_super_ops;
+
+/*
+ * Types for mounting the default subvolume and a subvolume explicitly
+ * requested by subvol=/path. That way the callchain is straightforward and we
+ * don't have to play tricks with the mount options and recursive calls to
+ * btrfs_mount.
+ *
+ * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
+ */
static struct file_system_type btrfs_fs_type;
+static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
@@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno)
return errstr;
}
-/* btrfs handle error by forcing the filesystem readonly */
-static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
-{
- struct super_block *sb = fs_info->sb;
-
- if (sb_rdonly(sb))
- return;
-
- if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
- sb->s_flags |= SB_RDONLY;
- btrfs_info(fs_info, "forced readonly");
- /*
- * Note that a running device replace operation is not
- * canceled here although there is no way to update
- * the progress. It would add the risk of a deadlock,
- * therefore the canceling is omitted. The only penalty
- * is that some I/O remains active until the procedure
- * completes. The next time when the filesystem is
- * mounted writeable again, the device replace
- * operation continues.
- */
- }
-}
-
/*
* __btrfs_handle_fs_error decodes expected errors from the caller and
* invokes the approciate error response.
@@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
/* Don't go through full error handling during mount */
- if (sb->s_flags & SB_BORN)
- btrfs_handle_error(fs_info);
+ if (!(sb->s_flags & SB_BORN))
+ return;
+
+ if (sb_rdonly(sb))
+ return;
+
+ /* btrfs handle error by forcing the filesystem readonly */
+ sb->s_flags |= SB_RDONLY;
+ btrfs_info(fs_info, "forced readonly");
+ /*
+ * Note that a running device replace operation is not canceled here
+ * although there is no way to update the progress. It would add the
+ * risk of a deadlock, therefore the canceling is omitted. The only
+ * penalty is that some I/O remains active until the procedure
+ * completes. The next time when the filesystem is mounted writeable
+ * again, the device replace operation continues.
+ */
}
#ifdef CONFIG_PRINTK
@@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags)
{
substring_t args[MAX_OPT_ARGS];
- char *p, *num, *orig = NULL;
+ char *p, *num;
u64 cache_gen;
int intarg;
int ret = 0;
@@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
if (!options)
goto check;
- /*
- * strsep changes the string, duplicate it because parse_options
- * gets called twice
- */
- options = kstrdup(options, GFP_KERNEL);
- if (!options)
- return -ENOMEM;
-
- orig = options;
-
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
@@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvolrootid:
case Opt_device:
/*
- * These are parsed by btrfs_parse_early_options
+ * These are parsed by btrfs_parse_subvol_options
+ * and btrfs_parse_early_options
* and can be happily ignored here.
*/
break;
@@ -877,7 +868,6 @@ out:
btrfs_info(info, "disk space caching is enabled");
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(info, "using free space tree");
- kfree(orig);
return ret;
}
@@ -888,11 +878,60 @@ out:
* only when we need to allocate a new super block.
*/
static int btrfs_parse_early_options(const char *options, fmode_t flags,
- void *holder, char **subvol_name, u64 *subvol_objectid,
- struct btrfs_fs_devices **fs_devices)
+ void *holder, struct btrfs_fs_devices **fs_devices)
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
+ int error = 0;
+
+ if (!options)
+ return 0;
+
+ /*
+ * strsep changes the string, duplicate it because btrfs_parse_options
+ * gets called later
+ */
+ opts = kstrdup(options, GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+ orig = opts;
+
+ while ((p = strsep(&opts, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ if (token == Opt_device) {
+ device_name = match_strdup(&args[0]);
+ if (!device_name) {
+ error = -ENOMEM;
+ goto out;
+ }
+ error = btrfs_scan_one_device(device_name,
+ flags, holder, fs_devices);
+ kfree(device_name);
+ if (error)
+ goto out;
+ }
+ }
+
+out:
+ kfree(orig);
+ return error;
+}
+
+/*
+ * Parse mount options that are related to subvolume id
+ *
+ * The value is later passed to mount_subvol()
+ */
+static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
+ char **subvol_name, u64 *subvol_objectid)
+{
+ substring_t args[MAX_OPT_ARGS];
+ char *opts, *orig, *p;
char *num = NULL;
int error = 0;
@@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
return 0;
/*
- * strsep changes the string, duplicate it because parse_options
- * gets called twice
+ * strsep changes the string, duplicate it because
+ * btrfs_parse_early_options gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
@@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
break;
- case Opt_device:
- device_name = match_strdup(&args[0]);
- if (!device_name) {
- error = -ENOMEM;
- goto out;
- }
- error = btrfs_scan_one_device(device_name,
- flags, holder, fs_devices);
- kfree(device_name);
- if (error)
- goto out;
- break;
default:
break;
}
@@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
- char *compress_type;
+ const char *compress_type;
if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded");
@@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
if (btrfs_test_opt(info, COMPRESS)) {
- if (info->compress_type == BTRFS_COMPRESS_ZLIB)
- compress_type = "zlib";
- else if (info->compress_type == BTRFS_COMPRESS_LZO)
- compress_type = "lzo";
- else
- compress_type = "zstd";
+ compress_type = btrfs_compress_type2str(info->compress_type);
if (btrfs_test_opt(info, FORCE_COMPRESS))
seq_printf(seq, ",compress-force=%s", compress_type);
else
@@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode)
return 0;
}
-/*
- * This will add subvolid=0 to the argument string while removing any subvol=
- * and subvolid= arguments to make sure we get the top-level root for path
- * walking to the subvol we want.
- */
-static char *setup_root_args(char *args)
-{
- char *buf, *dst, *sep;
-
- if (!args)
- return kstrdup("subvolid=0", GFP_KERNEL);
-
- /* The worst case is that we add ",subvolid=0" to the end. */
- buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
- GFP_KERNEL);
- if (!buf)
- return NULL;
-
- while (1) {
- sep = strchrnul(args, ',');
- if (!strstarts(args, "subvol=") &&
- !strstarts(args, "subvolid=")) {
- memcpy(dst, args, sep - args);
- dst += sep - args;
- *dst++ = ',';
- }
- if (*sep)
- args = sep + 1;
- else
- break;
- }
- strcpy(dst, "subvolid=0");
-
- return buf;
-}
-
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
- int flags, const char *device_name,
- char *data)
+ const char *device_name, struct vfsmount *mnt)
{
struct dentry *root;
- struct vfsmount *mnt = NULL;
- char *newargs;
int ret;
- newargs = setup_root_args(data);
- if (!newargs) {
- root = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
- if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
- if (flags & SB_RDONLY) {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
- device_name, newargs);
- } else {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
- device_name, newargs);
- if (IS_ERR(mnt)) {
- root = ERR_CAST(mnt);
- mnt = NULL;
- goto out;
- }
-
- down_write(&mnt->mnt_sb->s_umount);
- ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
- up_write(&mnt->mnt_sb->s_umount);
- if (ret < 0) {
- root = ERR_PTR(ret);
- goto out;
- }
- }
- }
- if (IS_ERR(mnt)) {
- root = ERR_CAST(mnt);
- mnt = NULL;
- goto out;
- }
-
if (!subvol_name) {
if (!subvol_objectid) {
ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
@@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
out:
mntput(mnt);
- kfree(newargs);
kfree(subvol_name);
return root;
}
@@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
/*
* Find a superblock for the given device / mount point.
*
- * Note: This is based on get_sb_bdev from fs/super.c with a few additions
- * for multiple device setup. Make sure to keep it in sync.
+ * Note: This is based on mount_bdev from fs/super.c with a few additions
+ * for multiple device setup. Make sure to keep it in sync.
*/
-static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
- const char *device_name, void *data)
+static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
+ int flags, const char *device_name, void *data)
{
struct block_device *bdev = NULL;
struct super_block *s;
@@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
fmode_t mode = FMODE_READ;
- char *subvol_name = NULL;
- u64 subvol_objectid = 0;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
error = btrfs_parse_early_options(data, mode, fs_type,
- &subvol_name, &subvol_objectid,
&fs_devices);
if (error) {
- kfree(subvol_name);
return ERR_PTR(error);
}
- if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
- /* mount_subvol() will free subvol_name. */
- return mount_subvol(subvol_name, subvol_objectid, flags,
- device_name, data);
- }
-
security_init_mnt_opts(&new_sec_opts);
if (data) {
error = parse_security_options(data, &new_sec_opts);
@@ -1674,6 +1611,84 @@ error_sec_opts:
return ERR_PTR(error);
}
+/*
+ * Mount function which is called by VFS layer.
+ *
+ * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
+ * which needs vfsmount* of device's root (/). This means device's root has to
+ * be mounted internally in any case.
+ *
+ * Operation flow:
+ * 1. Parse subvol id related options for later use in mount_subvol().
+ *
+ * 2. Mount device's root (/) by calling vfs_kern_mount().
+ *
+ * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
+ * first place. In order to avoid calling btrfs_mount() again, we use
+ * different file_system_type which is not registered to VFS by
+ * register_filesystem() (btrfs_root_fs_type). As a result,
+ * btrfs_mount_root() is called. The return value will be used by
+ * mount_subtree() in mount_subvol().
+ *
+ * 3. Call mount_subvol() to get the dentry of subvolume. Since there is
+ * "btrfs subvolume set-default", mount_subvol() is called always.
+ */
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+ const char *device_name, void *data)
+{
+ struct vfsmount *mnt_root;
+ struct dentry *root;
+ fmode_t mode = FMODE_READ;
+ char *subvol_name = NULL;
+ u64 subvol_objectid = 0;
+ int error = 0;
+
+ if (!(flags & SB_RDONLY))
+ mode |= FMODE_WRITE;
+
+ error = btrfs_parse_subvol_options(data, mode,
+ &subvol_name, &subvol_objectid);
+ if (error) {
+ kfree(subvol_name);
+ return ERR_PTR(error);
+ }
+
+ /* mount device's root (/) */
+ mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
+ if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
+ if (flags & SB_RDONLY) {
+ mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
+ flags & ~SB_RDONLY, device_name, data);
+ } else {
+ mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
+ flags | SB_RDONLY, device_name, data);
+ if (IS_ERR(mnt_root)) {
+ root = ERR_CAST(mnt_root);
+ goto out;
+ }
+
+ down_write(&mnt_root->mnt_sb->s_umount);
+ error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
+ up_write(&mnt_root->mnt_sb->s_umount);
+ if (error < 0) {
+ root = ERR_PTR(error);
+ mntput(mnt_root);
+ goto out;
+ }
+ }
+ }
+ if (IS_ERR(mnt_root)) {
+ root = ERR_CAST(mnt_root);
+ goto out;
+ }
+
+ /* mount_subvol() will free subvol_name and mnt_root */
+ root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
+
+out:
+ return root;
+}
+
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size)
{
@@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
- if (!btrfs_check_rw_degradable(fs_info)) {
+ if (!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
"too many missing devices, writeable remount is not allowed");
ret = -EACCES;
@@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
- if (!device->in_fs_metadata || !device->bdev ||
- device->is_tgtdev_for_dev_replace)
+ if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+ &device->dev_state) ||
+ !device->bdev ||
+ test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
continue;
if (i >= nr_devices)
@@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = {
.kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};
+
+static struct file_system_type btrfs_root_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "btrfs",
+ .mount = btrfs_mount_root,
+ .kill_sb = btrfs_kill_super,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+};
+
MODULE_ALIAS_FS("btrfs");
static int btrfs_control_open(struct inode *inode, struct file *file)
@@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_fs_type, &fs_devices);
+ &btrfs_root_fs_type, &fs_devices);
break;
case BTRFS_IOC_DEVICES_READY:
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_fs_type, &fs_devices);
+ &btrfs_root_fs_type, &fs_devices);
if (ret)
break;
ret = !(fs_devices->num_devices == fs_devices->total_devices);
@@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
while (cur_devices) {
head = &cur_devices->devices;
list_for_each_entry(dev, head, dev_list) {
- if (dev->missing)
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->name)
continue;
@@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = {
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");
-static int btrfs_interface_init(void)
+static int __init btrfs_interface_init(void)
{
return misc_register(&btrfs_misc);
}
@@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void)
misc_deregister(&btrfs_misc);
}
-static void btrfs_print_mod_info(void)
+static void __init btrfs_print_mod_info(void)
{
pr_info("Btrfs loaded, crc32c=%s"
#ifdef CONFIG_BTRFS_DEBUG