summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/block-group.c40
-rw-r--r--fs/btrfs/block-group.h4
-rw-r--r--fs/btrfs/btrfs_inode.h11
-rw-r--r--fs/btrfs/compression.c8
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/dev-replace.c7
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c44
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/inode.c65
-rw-r--r--fs/btrfs/ioctl.c22
-rw-r--r--fs/btrfs/props.c59
-rw-r--r--fs/btrfs/props.h4
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/sysfs.c3
-rw-r--r--fs/btrfs/tree-log.c54
-rw-r--r--fs/btrfs/volumes.c67
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/xattr.c11
-rw-r--r--fs/btrfs/zoned.c47
-rw-r--r--fs/btrfs/zoned.h4
23 files changed, 350 insertions, 164 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c22d287e020b..0dd6de994199 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2503,12 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
return ERR_PTR(ret);
}
- /*
- * New block group is likely to be used soon. Try to activate it now.
- * Failure is OK for now.
- */
- btrfs_zone_activate(cache);
-
ret = exclude_super_stripes(cache);
if (ret) {
/* We may have excluded something, so call this just in case */
@@ -2946,7 +2940,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
struct btrfs_path *path = NULL;
LIST_HEAD(dirty);
struct list_head *io = &cur_trans->io_bgs;
- int num_started = 0;
int loops = 0;
spin_lock(&cur_trans->dirty_bgs_lock);
@@ -3012,7 +3005,6 @@ again:
cache->io_ctl.inode = NULL;
ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
- num_started++;
should_put = 0;
/*
@@ -3113,7 +3105,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
int should_put;
struct btrfs_path *path;
struct list_head *io = &cur_trans->io_bgs;
- int num_started = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3171,7 +3162,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
cache->io_ctl.inode = NULL;
ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
- num_started++;
should_put = 0;
list_add_tail(&cache->io_list, io);
} else {
@@ -3455,7 +3445,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
{
struct btrfs_block_group *bg;
int ret;
@@ -3542,7 +3532,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
out:
btrfs_trans_release_chunk_metadata(trans);
- return ret;
+ if (ret)
+ return ERR_PTR(ret);
+
+ btrfs_get_block_group(bg);
+ return bg;
}
/*
@@ -3657,10 +3651,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
+ struct btrfs_block_group *ret_bg;
bool wait_for_alloc = false;
bool should_alloc = false;
+ bool from_extent_allocation = false;
int ret = 0;
+ if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) {
+ from_extent_allocation = true;
+ force = CHUNK_ALLOC_FORCE;
+ }
+
/* Don't re-enter if we're already allocating a chunk */
if (trans->allocating_chunk)
return -ENOSPC;
@@ -3750,9 +3751,22 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
force_metadata_allocation(fs_info);
}
- ret = do_chunk_alloc(trans, flags);
+ ret_bg = do_chunk_alloc(trans, flags);
trans->allocating_chunk = false;
+ if (IS_ERR(ret_bg)) {
+ ret = PTR_ERR(ret_bg);
+ } else if (from_extent_allocation) {
+ /*
+ * New block group is likely to be used soon. Try to activate
+ * it now. Failure is OK for now.
+ */
+ btrfs_zone_activate(ret_bg);
+ }
+
+ if (!ret)
+ btrfs_put_block_group(ret_bg);
+
spin_lock(&space_info->lock);
if (ret < 0) {
if (ret == -ENOSPC)
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 93aabc68bb6a..e8308f2ad07d 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -35,11 +35,15 @@ enum btrfs_discard_state {
* the FS with empty chunks
*
* CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
+ * find_free_extent() that also activaes the zone
*/
enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_NO_FORCE,
CHUNK_ALLOC_LIMITED,
CHUNK_ALLOC_FORCE,
+ CHUNK_ALLOC_FORCE_FOR_EXTENT,
};
struct btrfs_caching_control {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 47e72d72f7d0..32131a5d321b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -384,6 +384,17 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
return ret;
}
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
+{
+ if (inode->flags & BTRFS_INODE_NODATACOW ||
+ inode->flags & BTRFS_INODE_NODATASUM)
+ return false;
+ return true;
+}
+
struct btrfs_dio_private {
struct inode *inode;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index be476f094300..19bf36d8ffea 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -537,6 +537,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
+ if (blkcg_css)
+ kthread_associate_blkcg(blkcg_css);
+
while (cur_disk_bytenr < disk_start + compressed_len) {
u64 offset = cur_disk_bytenr - disk_start;
unsigned int index = offset >> PAGE_SHIFT;
@@ -555,6 +558,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
bio = NULL;
goto finish_cb;
}
+ if (blkcg_css)
+ bio->bi_opf |= REQ_CGROUP_PUNT;
}
/*
* We should never reach next_stripe_start start as we will
@@ -612,6 +617,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
return 0;
finish_cb:
+ if (blkcg_css)
+ kthread_associate_blkcg(NULL);
+
if (bio) {
bio->bi_status = ret;
bio_endio(bio);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b7631b88426e..077c95e9baa5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1060,6 +1060,7 @@ struct btrfs_fs_info {
*/
spinlock_t relocation_bg_lock;
u64 data_reloc_bg;
+ struct mutex zoned_data_reloc_io_lock;
u64 nr_global_roots;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 71fd99b48283..f26202621989 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -734,7 +734,12 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
- /* Commit dev_replace state and reserve 1 item for it. */
+ /*
+ * Commit dev_replace state and reserve 1 item for it.
+ * This is crucial to ensure we won't miss copying extents for new block
+ * groups that are allocated after we started the device replace, and
+ * must be done after setting up the device replace state.
+ */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b30309f187cf..31c3f592e587 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1850,9 +1850,10 @@ again:
ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
- btrfs_put_root(root);
- if (ret == -EEXIST)
+ if (ret == -EEXIST) {
+ btrfs_put_root(root);
goto again;
+ }
goto fail;
}
return root;
@@ -3156,6 +3157,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
mutex_init(&fs_info->zoned_meta_io_lock);
+ mutex_init(&fs_info->zoned_data_reloc_io_lock);
seqlock_init(&fs_info->profiles_lock);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -3656,6 +3658,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
+ /*
+ * V1 space cache has some hardcoded PAGE_SIZE usage, and is
+ * going to be deprecated.
+ *
+ * Force to use v2 cache for subpage case.
+ */
+ btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
+ btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
+ "forcing free space tree for sector size %u with page size %lu",
+ sectorsize, PAGE_SIZE);
+
btrfs_warn(fs_info,
"read-write for sector size %u with page size %lu is experimental",
sectorsize, PAGE_SIZE);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f477035a2ac2..6aa92f84f465 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4082,7 +4082,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
}
ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
- CHUNK_ALLOC_FORCE);
+ CHUNK_ALLOC_FORCE_FOR_EXTENT);
/* Do not bail out on ENOSPC since we can do more. */
if (ret == -ENOSPC)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 724e8fe06aa0..33c19f51d79b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2658,6 +2658,7 @@ int btrfs_repair_one_sector(struct inode *inode,
repair_bio = btrfs_bio_alloc(1);
repair_bbio = btrfs_bio(repair_bio);
+ repair_bbio->file_offset = start;
repair_bio->bi_opf = REQ_OP_READ;
repair_bio->bi_end_io = failed_bio->bi_end_io;
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
@@ -3333,24 +3334,37 @@ static int alloc_new_bio(struct btrfs_inode *inode,
ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
if (ret < 0)
goto error;
- if (wbc) {
- struct block_device *bdev;
- bdev = fs_info->fs_devices->latest_dev->bdev;
- bio_set_dev(bio, bdev);
- wbc_init_bio(wbc, bio);
- }
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- struct btrfs_device *device;
+ if (wbc) {
+ /*
+ * For Zone append we need the correct block_device that we are
+ * going to write to set in the bio to be able to respect the
+ * hardware limitation. Look it up here:
+ */
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ struct btrfs_device *dev;
+
+ dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
+ fs_info->sectorsize);
+ if (IS_ERR(dev)) {
+ ret = PTR_ERR(dev);
+ goto error;
+ }
- device = btrfs_zoned_get_device(fs_info, disk_bytenr,
- fs_info->sectorsize);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- goto error;
+ bio_set_dev(bio, dev->bdev);
+ } else {
+ /*
+ * Otherwise pick the last added device to support
+ * cgroup writeback. For multi-device file systems this
+ * means blk-cgroup policies have to always be set on the
+ * last added/replaced device. This is a bit odd but has
+ * been like that for a long time.
+ */
+ bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
}
-
- btrfs_bio(bio)->device = device;
+ wbc_init_bio(wbc, bio);
+ } else {
+ ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
}
return 0;
error:
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0399cf8e3c32..151e9da5da2d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl {
*/
struct extent_changeset {
/* How many bytes are set/cleared in this operation */
- unsigned int bytes_changed;
+ u64 bytes_changed;
/* Changed ranges */
struct ulist range_changed;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9f455c96c974..380054c94e4b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2957,8 +2957,9 @@ out:
return ret;
}
-static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
{
+ struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
@@ -2990,6 +2991,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
goto out_only_mutex;
}
+ ret = file_modified(file);
+ if (ret)
+ goto out_only_mutex;
+
lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
lockend = round_down(offset + len,
btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
@@ -3430,7 +3435,7 @@ static long btrfs_fallocate(struct file *file, int mode,
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
- return btrfs_punch_hole(inode, offset, len);
+ return btrfs_punch_hole(file, offset, len);
/*
* Only trigger disk allocation, don't trigger qgroup reserve
@@ -3452,6 +3457,10 @@ static long btrfs_fallocate(struct file *file, int mode,
goto out;
}
+ ret = file_modified(file);
+ if (ret)
+ goto out;
+
/*
* TODO: Move these two operations after we have checked
* accurate reserved space, or fallocate can still fail but
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6bfc4343c98d..95c499b8424e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -481,17 +481,6 @@ static noinline int add_async_extent(struct async_chunk *cow,
}
/*
- * Check if the inode has flags compatible with compression
- */
-static inline bool inode_can_compress(struct btrfs_inode *inode)
-{
- if (inode->flags & BTRFS_INODE_NODATACOW ||
- inode->flags & BTRFS_INODE_NODATASUM)
- return false;
- return true;
-}
-
-/*
* Check if the inode needs to be submitted to compression, based on mount
* options, defragmentation, properties or heuristics.
*/
@@ -500,7 +489,7 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- if (!inode_can_compress(inode)) {
+ if (!btrfs_inode_can_compress(inode)) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
btrfs_ino(inode));
@@ -1128,7 +1117,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
int ret = 0;
if (btrfs_is_free_space_inode(inode)) {
- WARN_ON_ONCE(1);
ret = -EINVAL;
goto out_unlock;
}
@@ -2017,11 +2005,10 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
* to use run_delalloc_nocow() here, like for regular
* preallocated inodes.
*/
- ASSERT(!zoned ||
- (zoned && btrfs_is_data_reloc_root(inode->root)));
+ ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
- } else if (!inode_can_compress(inode) ||
+ } else if (!btrfs_inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
if (zoned)
ret = run_delalloc_zoned(inode, locked_page, start, end,
@@ -4488,6 +4475,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
return -EPERM;
}
+ if (atomic_read(&dest->nr_swapfiles)) {
+ spin_unlock(&dest->root_item_lock);
+ btrfs_warn(fs_info,
+ "attempt to delete subvolume %llu with active swapfile",
+ root->root_key.objectid);
+ return -EPERM;
+ }
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
@@ -7438,6 +7432,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
u64 block_start, orig_start, orig_block_len, ram_bytes;
bool can_nocow = false;
bool space_reserved = false;
+ u64 prev_len;
int ret = 0;
/*
@@ -7465,6 +7460,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
can_nocow = true;
}
+ prev_len = len;
if (can_nocow) {
struct extent_map *em2;
@@ -7494,8 +7490,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
goto out;
}
} else {
- const u64 prev_len = len;
-
/* Our caller expects us to free the input extent map. */
free_extent_map(em);
*map = NULL;
@@ -7526,7 +7520,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
* We have created our ordered extent, so we can now release our reservation
* for an outstanding extent.
*/
- btrfs_delalloc_release_extents(BTRFS_I(inode), len);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len);
/*
* Need to update the i_size under the extent lock so buffered
@@ -7805,8 +7799,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
struct bio_vec bvec;
struct bvec_iter iter;
- const u64 orig_file_offset = dip->file_offset;
- u64 start = orig_file_offset;
u32 bio_offset = 0;
blk_status_t err = BLK_STS_OK;
@@ -7816,6 +7808,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
pgoff = bvec.bv_offset;
for (i = 0; i < nr_sectors; i++) {
+ u64 start = bbio->file_offset + bio_offset;
+
ASSERT(pgoff < PAGE_SIZE);
if (uptodate &&
(!csum || !check_data_csum(inode, bbio,
@@ -7828,17 +7822,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
} else {
int ret;
- ASSERT((start - orig_file_offset) < UINT_MAX);
- ret = btrfs_repair_one_sector(inode,
- &bbio->bio,
- start - orig_file_offset,
- bvec.bv_page, pgoff,
+ ret = btrfs_repair_one_sector(inode, &bbio->bio,
+ bio_offset, bvec.bv_page, pgoff,
start, bbio->mirror_num,
submit_dio_repair_bio);
if (ret)
err = errno_to_blk_status(ret);
}
- start += sectorsize;
ASSERT(bio_offset + sectorsize > bio_offset);
bio_offset += sectorsize;
pgoff += sectorsize;
@@ -7865,6 +7855,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
+ struct btrfs_bio *bbio = btrfs_bio(bio);
blk_status_t err = bio->bi_status;
if (err)
@@ -7875,12 +7866,12 @@ static void btrfs_end_dio_bio(struct bio *bio)
bio->bi_iter.bi_size, err);
if (bio_op(bio) == REQ_OP_READ)
- err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err);
+ err = btrfs_check_read_dio_bio(dip, bbio, !err);
if (err)
dip->dio_bio->bi_status = err;
- btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
+ btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
bio_put(bio);
btrfs_dio_private_put(dip);
@@ -8041,6 +8032,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
+ btrfs_bio(bio)->file_offset = file_offset;
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
status = extract_ordered_extent(BTRFS_I(inode), bio,
@@ -11107,8 +11099,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
* set. We use this counter to prevent snapshots. We must increment it
* before walking the extents because we don't want a concurrent
* snapshot to run after we've already checked the extents.
+ *
+ * It is possible that subvolume is marked for deletion but still not
+ * removed yet. To prevent this race, we check the root status before
+ * activating the swapfile.
*/
+ spin_lock(&root->root_item_lock);
+ if (btrfs_root_dead(root)) {
+ spin_unlock(&root->root_item_lock);
+
+ btrfs_exclop_finish(fs_info);
+ btrfs_warn(fs_info,
+ "cannot activate swapfile because subvolume %llu is being deleted",
+ root->root_key.objectid);
+ return -EPERM;
+ }
atomic_inc(&root->nr_swapfiles);
+ spin_unlock(&root->root_item_lock);
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 238cee5b5254..be6c24577dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1239,7 +1239,7 @@ static u32 get_extent_max_capacity(const struct extent_map *em)
}
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
- bool locked)
+ u32 extent_thresh, u64 newer_than, bool locked)
{
struct extent_map *next;
bool ret = false;
@@ -1249,11 +1249,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
return false;
/*
- * We want to check if the next extent can be merged with the current
- * one, which can be an extent created in a past generation, so we pass
- * a minimum generation of 0 to defrag_lookup_extent().
+ * Here we need to pass @newer_then when checking the next extent, or
+ * we will hit a case we mark current extent for defrag, but the next
+ * one will not be a target.
+ * This will just cause extra IO without really reducing the fragments.
*/
- next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
+ next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
/* No more em or hole */
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
goto out;
@@ -1265,6 +1266,13 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
*/
if (next->len >= get_extent_max_capacity(em))
goto out;
+ /* Skip older extent */
+ if (next->generation < newer_than)
+ goto out;
+ /* Also check extent size */
+ if (next->len >= extent_thresh)
+ goto out;
+
ret = true;
out:
free_extent_map(next);
@@ -1470,7 +1478,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto next;
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
- locked);
+ extent_thresh, newer_than, locked);
if (!next_mergeable) {
struct defrag_target_range *last;
@@ -5448,8 +5456,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_fs_info(fs_info, argp);
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(fs_info, argp);
- case BTRFS_IOC_BALANCE:
- return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TREE_SEARCH:
return btrfs_ioctl_tree_search(inode, argp);
case BTRFS_IOC_TREE_SEARCH_V2:
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 1a6d2d5b4b33..1b31481f9e72 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -17,9 +17,11 @@ static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
struct prop_handler {
struct hlist_node node;
const char *xattr_name;
- int (*validate)(const char *value, size_t len);
+ int (*validate)(const struct btrfs_inode *inode, const char *value,
+ size_t len);
int (*apply)(struct inode *inode, const char *value, size_t len);
const char *(*extract)(struct inode *inode);
+ bool (*ignore)(const struct btrfs_inode *inode);
int inheritable;
};
@@ -55,7 +57,8 @@ find_prop_handler(const char *name,
return NULL;
}
-int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
+int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
+ const char *value, size_t value_len)
{
const struct prop_handler *handler;
@@ -69,7 +72,29 @@ int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
if (value_len == 0)
return 0;
- return handler->validate(value, value_len);
+ return handler->validate(inode, value, value_len);
+}
+
+/*
+ * Check if a property should be ignored (not set) for an inode.
+ *
+ * @inode: The target inode.
+ * @name: The property's name.
+ *
+ * The caller must be sure the given property name is valid, for example by
+ * having previously called btrfs_validate_prop().
+ *
+ * Returns: true if the property should be ignored for the given inode
+ * false if the property must not be ignored for the given inode
+ */
+bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name)
+{
+ const struct prop_handler *handler;
+
+ handler = find_prop_handler(name, NULL);
+ ASSERT(handler != NULL);
+
+ return handler->ignore(inode);
}
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
@@ -252,8 +277,12 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
return ret;
}
-static int prop_compression_validate(const char *value, size_t len)
+static int prop_compression_validate(const struct btrfs_inode *inode,
+ const char *value, size_t len)
{
+ if (!btrfs_inode_can_compress(inode))
+ return -EINVAL;
+
if (!value)
return 0;
@@ -310,6 +339,22 @@ static int prop_compression_apply(struct inode *inode, const char *value,
return 0;
}
+static bool prop_compression_ignore(const struct btrfs_inode *inode)
+{
+ /*
+ * Compression only has effect for regular files, and for directories
+ * we set it just to propagate it to new files created inside them.
+ * Everything else (symlinks, devices, sockets, fifos) is pointless as
+ * it will do nothing, so don't waste metadata space on a compression
+ * xattr for anything that is neither a file nor a directory.
+ */
+ if (!S_ISREG(inode->vfs_inode.i_mode) &&
+ !S_ISDIR(inode->vfs_inode.i_mode))
+ return true;
+
+ return false;
+}
+
static const char *prop_compression_extract(struct inode *inode)
{
switch (BTRFS_I(inode)->prop_compress) {
@@ -330,6 +375,7 @@ static struct prop_handler prop_handlers[] = {
.validate = prop_compression_validate,
.apply = prop_compression_apply,
.extract = prop_compression_extract,
+ .ignore = prop_compression_ignore,
.inheritable = 1
},
};
@@ -356,6 +402,9 @@ static int inherit_props(struct btrfs_trans_handle *trans,
if (!h->inheritable)
continue;
+ if (h->ignore(BTRFS_I(inode)))
+ continue;
+
value = h->extract(parent);
if (!value)
continue;
@@ -364,7 +413,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
* This is not strictly necessary as the property should be
* valid, but in case it isn't, don't propagate it further.
*/
- ret = h->validate(value, strlen(value));
+ ret = h->validate(BTRFS_I(inode), value, strlen(value));
if (ret)
continue;
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
index 40b2c65b518c..59bea741cfcf 100644
--- a/fs/btrfs/props.h
+++ b/fs/btrfs/props.h
@@ -13,7 +13,9 @@ void __init btrfs_props_init(void);
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
const char *name, const char *value, size_t value_len,
int flags);
-int btrfs_validate_prop(const char *name, const char *value, size_t value_len);
+int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
+ const char *value, size_t value_len);
+bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name);
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 11089568b287..8cd713d37ad2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3699,6 +3699,31 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache)
goto skip;
+ ASSERT(cache->start <= chunk_offset);
+ /*
+ * We are using the commit root to search for device extents, so
+ * that means we could have found a device extent item from a
+ * block group that was deleted in the current transaction. The
+ * logical start offset of the deleted block group, stored at
+ * @chunk_offset, might be part of the logical address range of
+ * a new block group (which uses different physical extents).
+ * In this case btrfs_lookup_block_group() has returned the new
+ * block group, and its start address is less than @chunk_offset.
+ *
+ * We skip such new block groups, because it's pointless to
+ * process them, as we won't find their extents because we search
+ * for them using the commit root of the extent tree. For a device
+ * replace it's also fine to skip it, we won't miss copying them
+ * to the target device because we have the write duplication
+ * setup through the regular write path (by btrfs_map_block()),
+ * and we have committed a transaction when we started the device
+ * replace, right after setting up the device replace state.
+ */
+ if (cache->start < chunk_offset) {
+ btrfs_put_block_group(cache);
+ goto skip;
+ }
+
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
spin_lock(&cache->lock);
if (!cache->to_copy) {
@@ -3822,7 +3847,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
dev_replace->item_needs_writeback = 1;
up_write(&dev_replace->rwsem);
- ASSERT(cache->start == chunk_offset);
ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset,
dev_extent_len);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 17389a42a3ab..ba78ca5aabbb 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -922,6 +922,9 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
case BTRFS_EXCLOP_BALANCE:
str = "balance\n";
break;
+ case BTRFS_EXCLOP_BALANCE_PAUSED:
+ str = "balance paused\n";
+ break;
case BTRFS_EXCLOP_DEV_ADD:
str = "device add\n";
break;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 571dae8ad65e..e65633686378 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3188,6 +3188,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
if (ret) {
mutex_unlock(&fs_info->tree_root->log_mutex);
+ blk_finish_plug(&plug);
goto out;
}
}
@@ -3720,11 +3721,29 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
key.offset = first_offset;
key.type = BTRFS_DIR_LOG_INDEX_KEY;
ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
- if (ret)
+ /*
+ * -EEXIST is fine and can happen sporadically when we are logging a
+ * directory and have concurrent insertions in the subvolume's tree for
+ * items from other inodes and that result in pushing off some dir items
+ * from one leaf to another in order to accommodate for the new items.
+ * This results in logging the same dir index range key.
+ */
+ if (ret && ret != -EEXIST)
return ret;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_dir_log_item);
+ if (ret == -EEXIST) {
+ const u64 curr_end = btrfs_dir_log_end(path->nodes[0], item);
+
+ /*
+ * btrfs_del_dir_entries_in_log() might have been called during
+ * an unlink between the initial insertion of this key and the
+ * current update, or we might be logging a single entry deletion
+ * during a rename, so set the new last_offset to the max value.
+ */
+ last_offset = max(last_offset, curr_end);
+ }
btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(path);
@@ -3848,13 +3867,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
ret = insert_dir_log_key(trans, log, dst_path,
ino, *last_old_dentry_offset + 1,
key.offset - 1);
- /*
- * -EEXIST should never happen because when we
- * log a directory in full mode (LOG_INODE_ALL)
- * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from
- * the log tree.
- */
- ASSERT(ret != -EEXIST);
if (ret < 0)
return ret;
}
@@ -5804,6 +5816,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
/*
+ * For symlinks, we must always log their content, which is stored in an
+ * inline extent, otherwise we could end up with an empty symlink after
+ * log replay, which is invalid on linux (symlink(2) returns -ENOENT if
+ * one attempts to create an empty symlink).
+ * We don't need to worry about flushing delalloc, because when we create
+ * the inline extent when the symlink is created (we never have delalloc
+ * for symlinks).
+ */
+ if (S_ISLNK(inode->vfs_inode.i_mode))
+ inode_only = LOG_INODE_ALL;
+
+ /*
* Before logging the inode item, cache the value returned by
* inode_logged(), because after that we have the need to figure out if
* the inode was previously logged in this transaction.
@@ -6181,7 +6205,7 @@ again:
}
ctx->log_new_dentries = false;
- if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
+ if (type == BTRFS_FT_DIR)
log_mode = LOG_INODE_ALL;
ret = btrfs_log_inode(trans, BTRFS_I(di_inode),
log_mode, ctx);
@@ -7018,12 +7042,12 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
/*
* Other concurrent task might be logging the old directory,
* as it can be triggered when logging other inode that had or
- * still has a dentry in the old directory. So take the old
- * directory's log_mutex to prevent getting an -EEXIST when
- * logging a key to record the deletion, or having that other
- * task logging the old directory get an -EEXIST if it attempts
- * to log the same key after we just did it. In both cases that
- * would result in falling back to a transaction commit.
+ * still has a dentry in the old directory. We lock the old
+ * directory's log_mutex to ensure the deletion of the old
+ * name is persisted, because during directory logging we
+ * delete all BTRFS_DIR_LOG_INDEX_KEY keys and the deletion of
+ * the old name's dir index item is in the delayed items, so
+ * it could be missed by an in progress directory logging.
*/
mutex_lock(&old_dir->log_mutex);
ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1be7cb2f955f..a8cc736731fd 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
path_put(&path);
}
-static int btrfs_rm_dev_item(struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device)
{
struct btrfs_root *root = device->fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_trans_handle *trans;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
@@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
if (ret) {
if (ret > 0)
ret = -ENOENT;
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
goto out;
}
ret = btrfs_del_item(trans, root, path);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- }
-
out:
btrfs_free_path(path);
- if (!ret)
- ret = btrfs_commit_transaction(trans);
return ret;
}
@@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
struct block_device **bdev, fmode_t *mode)
{
+ struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
if (ret)
- goto out;
+ return ret;
device = btrfs_find_device(fs_info->fs_devices, args);
if (!device) {
@@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = -ENOENT;
- goto out;
+ return ret;
}
if (btrfs_pinned_by_swapfile(fs_info, device)) {
btrfs_warn_in_rcu(fs_info,
"cannot remove device %s (devid %llu) due to active swapfile",
rcu_str_deref(device->name), device->devid);
- ret = -ETXTBSY;
- goto out;
+ return -ETXTBSY;
}
- if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
- ret = BTRFS_ERROR_DEV_TGT_REPLACE;
- goto out;
- }
+ if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
+ return BTRFS_ERROR_DEV_TGT_REPLACE;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
- fs_info->fs_devices->rw_devices == 1) {
- ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
- goto out;
- }
+ fs_info->fs_devices->rw_devices == 1)
+ return BTRFS_ERROR_DEV_ONLY_WRITABLE;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
@@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
if (ret)
goto error_undo;
- /*
- * TODO: the superblock still includes this device in its num_devices
- * counter although write_all_supers() is not locked out. This
- * could give a filesystem state which requires a degraded mount.
- */
- ret = btrfs_rm_dev_item(device);
- if (ret)
+ trans = btrfs_start_transaction(fs_info->chunk_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
goto error_undo;
+ }
+
+ ret = btrfs_rm_dev_item(trans, device);
+ if (ret) {
+ /* Any error in dev item removal is critical */
+ btrfs_crit(fs_info,
+ "failed to remove device item for devid %llu: %d",
+ device->devid, ret);
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
btrfs_scrub_cancel_dev(device);
@@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
free_fs_devices(cur_devices);
}
-out:
+ ret = btrfs_commit_transaction(trans);
+
return ret;
error_undo:
@@ -2240,7 +2231,7 @@ error_undo:
device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex);
}
- goto out;
+ return ret;
}
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
@@ -4439,10 +4430,12 @@ static int balance_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
int ret = 0;
+ sb_start_write(fs_info->sb);
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl)
ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
mutex_unlock(&fs_info->balance_mutex);
+ sb_end_write(fs_info->sb);
return ret;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bd297f23d19e..f3e28f11cfb6 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -328,6 +328,9 @@ struct btrfs_fs_devices {
struct btrfs_bio {
unsigned int mirror_num;
+ /* for direct I/O */
+ u64 file_offset;
+
/* @device is for stripe IO submission. */
struct btrfs_device *device;
u8 *csum;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 99abf41b89b9..85691dc2232f 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -262,7 +262,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
out:
if (start_trans)
btrfs_end_transaction(trans);
@@ -403,10 +404,13 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
struct btrfs_root *root = BTRFS_I(inode)->root;
name = xattr_full_name(handler, name);
- ret = btrfs_validate_prop(name, value, size);
+ ret = btrfs_validate_prop(BTRFS_I(inode), name, value, size);
if (ret)
return ret;
+ if (btrfs_ignore_prop(BTRFS_I(inode), name))
+ return 0;
+
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -416,7 +420,8 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
}
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b7b5fac1c779..d31b0eda210f 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1801,7 +1801,6 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
map = em->map_lookup;
/* We only support single profile for now */
- ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
free_extent_map(em);
@@ -1836,6 +1835,12 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
+ /* No space left */
+ if (block_group->alloc_offset == block_group->zone_capacity) {
+ ret = false;
+ goto out_unlock;
+ }
+
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
@@ -1843,35 +1848,23 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
if (device->zone_info->max_active_zones == 0)
continue;
- /* No space left */
- if (block_group->alloc_offset == block_group->zone_capacity) {
- ret = false;
- goto out_unlock;
- }
-
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
-
- /* Successfully activated all the zones */
- if (i == map->num_stripes - 1)
- block_group->zone_is_active = 1;
-
-
}
+
+ /* Successfully activated all the zones */
+ block_group->zone_is_active = 1;
spin_unlock(&block_group->lock);
- if (block_group->zone_is_active) {
- /* For the active block group list */
- btrfs_get_block_group(block_group);
+ /* For the active block group list */
+ btrfs_get_block_group(block_group);
- spin_lock(&fs_info->zone_active_bgs_lock);
- list_add_tail(&block_group->active_bg_list,
- &fs_info->zone_active_bgs);
- spin_unlock(&fs_info->zone_active_bgs_lock);
- }
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
return true;
@@ -1976,18 +1969,16 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
{
+ struct btrfs_fs_info *fs_info = fs_devices->fs_info;
struct btrfs_device *device;
bool ret = false;
- if (!btrfs_is_zoned(fs_devices->fs_info))
+ if (!btrfs_is_zoned(fs_info))
return true;
- /* Non-single profiles are not supported yet */
- ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
-
/* Check if there is a device with active zones left */
- mutex_lock(&fs_devices->device_list_mutex);
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ mutex_lock(&fs_info->chunk_mutex);
+ list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
struct btrfs_zoned_device_info *zinfo = device->zone_info;
if (!device->bdev)
@@ -1999,7 +1990,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
break;
}
}
- mutex_unlock(&fs_devices->device_list_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index cbf016a7bb5d..6dee76248cb4 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -359,7 +359,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
struct btrfs_root *root = inode->root;
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
- btrfs_inode_lock(&inode->vfs_inode, 0);
+ mutex_lock(&root->fs_info->zoned_data_reloc_io_lock);
}
static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
@@ -367,7 +367,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
struct btrfs_root *root = inode->root;
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
- btrfs_inode_unlock(&inode->vfs_inode, 0);
+ mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
}
#endif