summaryrefslogtreecommitdiff
path: root/fs/btrfs/free-space-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/free-space-tree.c')
-rw-r--r--fs/btrfs/free-space-tree.c1714
1 files changed, 1714 insertions, 0 deletions
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
new file mode 100644
index 000000000000..1ad2ad384b9e
--- /dev/null
+++ b/fs/btrfs/free-space-tree.c
@@ -0,0 +1,1714 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>
+#include "messages.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "locking.h"
+#include "free-space-tree.h"
+#include "transaction.h"
+#include "block-group.h"
+#include "fs.h"
+#include "accessors.h"
+#include "extent-tree.h"
+#include "root-tree.h"
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path);
+
+static struct btrfs_root *btrfs_free_space_root(
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+
+ if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
+ key.offset = block_group->global_root_id;
+ return btrfs_global_root(block_group->fs_info, &key);
+}
+
+void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
+{
+ u32 bitmap_range;
+ size_t bitmap_size;
+ u64 num_bitmaps, total_bitmap_size;
+
+ if (WARN_ON(cache->length == 0))
+ btrfs_warn(cache->fs_info, "block group %llu length is zero",
+ cache->start);
+
+ /*
+ * We convert to bitmaps when the disk space required for using extents
+ * exceeds that required for using bitmaps.
+ */
+ bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+ num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
+ bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
+ total_bitmap_size = num_bitmaps * bitmap_size;
+ cache->bitmap_high_thresh = div_u64(total_bitmap_size,
+ sizeof(struct btrfs_item));
+
+ /*
+ * We allow for a small buffer between the high threshold and low
+ * threshold to avoid thrashing back and forth between the two formats.
+ */
+ if (cache->bitmap_high_thresh > 100)
+ cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
+ else
+ cache->bitmap_low_thresh = 0;
+}
+
+static int add_new_free_space_info(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = block_group->start;
+ key.type = BTRFS_FREE_SPACE_INFO_KEY;
+ key.offset = block_group->length;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
+ if (ret)
+ return ret;
+
+ leaf = path->nodes[0];
+ info = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_info);
+ btrfs_set_free_space_extent_count(leaf, info, 0);
+ btrfs_set_free_space_flags(leaf, info, 0);
+ btrfs_release_path(path);
+ return 0;
+}
+
+EXPORT_FOR_TESTS
+struct btrfs_free_space_info *btrfs_search_free_space_info(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, int cow)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = block_group->start;
+ key.type = BTRFS_FREE_SPACE_INFO_KEY;
+ key.offset = block_group->length;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret != 0) {
+ btrfs_warn(fs_info, "missing free space info for %llu",
+ block_group->start);
+ DEBUG_WARN();
+ return ERR_PTR(-ENOENT);
+ }
+
+ return btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_free_space_info);
+}
+
+/*
+ * btrfs_search_slot() but we're looking for the greatest key less than the
+ * passed key.
+ */
+static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow)
+{
+ int ret;
+
+ ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
+ if (ret < 0)
+ return ret;
+
+ if (unlikely(ret == 0)) {
+ DEBUG_WARN();
+ return -EIO;
+ }
+
+ if (unlikely(p->slots[0] == 0)) {
+ DEBUG_WARN("no previous slot found");
+ return -EIO;
+ }
+ p->slots[0]--;
+
+ return 0;
+}
+
+static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
+ u64 size)
+{
+ return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
+}
+
+static unsigned long *alloc_bitmap(u32 bitmap_size)
+{
+ unsigned long *ret;
+ unsigned int nofs_flag;
+ u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
+
+ /*
+ * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
+ * into the filesystem here. All callers hold a transaction handle
+ * open, so if a GFP_KERNEL allocation recurses into the filesystem
+ * and triggers a transaction commit, we would deadlock.
+ */
+ nofs_flag = memalloc_nofs_save();
+ ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
+ return ret;
+}
+
+static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+ u8 *p = ((u8 *)map) + BIT_BYTE(start);
+ const unsigned int size = start + len;
+ int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+ while (len - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~0;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+EXPORT_FOR_TESTS
+int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ unsigned long *bitmap;
+ char *bitmap_cursor;
+ u64 start, end;
+ u64 bitmap_range, i;
+ u32 bitmap_size, flags, expected_extent_count;
+ u32 extent_count = 0;
+ int done = 0, nr;
+ int ret;
+
+ bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
+ bitmap = alloc_bitmap(bitmap_size);
+ if (unlikely(!bitmap))
+ return 0;
+
+ start = block_group->start;
+ end = block_group->start + block_group->length;
+
+ key.objectid = end - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while (!done) {
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ nr = 0;
+ path->slots[0]++;
+ while (path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+ if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+ ASSERT(found_key.objectid == block_group->start);
+ ASSERT(found_key.offset == block_group->length);
+ done = 1;
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
+ u64 first, last;
+
+ ASSERT(found_key.objectid >= start);
+ ASSERT(found_key.objectid < end);
+ ASSERT(found_key.objectid + found_key.offset <= end);
+
+ first = div_u64(found_key.objectid - start,
+ fs_info->sectorsize);
+ last = div_u64(found_key.objectid + found_key.offset - start,
+ fs_info->sectorsize);
+ le_bitmap_set(bitmap, first, last - first);
+
+ extent_count++;
+ nr++;
+ path->slots[0]--;
+ } else {
+ ASSERT(0);
+ }
+ }
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ btrfs_release_path(path);
+ }
+
+ info = btrfs_search_free_space_info(trans, block_group, path, 1);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ leaf = path->nodes[0];
+ flags = btrfs_free_space_flags(leaf, info);
+ flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
+ block_group->using_free_space_bitmaps = true;
+ block_group->using_free_space_bitmaps_cached = true;
+ btrfs_set_free_space_flags(leaf, info, flags);
+ expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+ btrfs_release_path(path);
+
+ if (unlikely(extent_count != expected_extent_count)) {
+ btrfs_err(fs_info,
+ "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->start, extent_count,
+ expected_extent_count);
+ ret = -EIO;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ bitmap_cursor = (char *)bitmap;
+ bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+ i = start;
+ while (i < end) {
+ unsigned long ptr;
+ u64 extent_size;
+ u32 data_size;
+
+ extent_size = min(end - i, bitmap_range);
+ data_size = free_space_bitmap_size(fs_info, extent_size);
+
+ key.objectid = i;
+ key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
+ key.offset = extent_size;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ data_size);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ write_extent_buffer(leaf, bitmap_cursor, ptr,
+ data_size);
+ btrfs_release_path(path);
+
+ i += extent_size;
+ bitmap_cursor += data_size;
+ }
+
+ ret = 0;
+out:
+ kvfree(bitmap);
+ return ret;
+}
+
+EXPORT_FOR_TESTS
+int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ unsigned long *bitmap;
+ u64 start, end;
+ u32 bitmap_size, flags, expected_extent_count;
+ unsigned long nrbits, start_bit, end_bit;
+ u32 extent_count = 0;
+ int done = 0, nr;
+ int ret;
+
+ bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
+ bitmap = alloc_bitmap(bitmap_size);
+ if (unlikely(!bitmap))
+ return 0;
+
+ start = block_group->start;
+ end = block_group->start + block_group->length;
+
+ key.objectid = end - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while (!done) {
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ nr = 0;
+ path->slots[0]++;
+ while (path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+ if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+ ASSERT(found_key.objectid == block_group->start);
+ ASSERT(found_key.offset == block_group->length);
+ done = 1;
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+ unsigned long ptr;
+ char *bitmap_cursor;
+ u32 bitmap_pos, data_size;
+
+ ASSERT(found_key.objectid >= start);
+ ASSERT(found_key.objectid < end);
+ ASSERT(found_key.objectid + found_key.offset <= end);
+
+ bitmap_pos = div_u64(found_key.objectid - start,
+ fs_info->sectorsize *
+ BITS_PER_BYTE);
+ bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+ data_size = free_space_bitmap_size(fs_info,
+ found_key.offset);
+
+ path->slots[0]--;
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ read_extent_buffer(leaf, bitmap_cursor, ptr,
+ data_size);
+
+ nr++;
+ } else {
+ ASSERT(0);
+ }
+ }
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ btrfs_release_path(path);
+ }
+
+ info = btrfs_search_free_space_info(trans, block_group, path, 1);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ leaf = path->nodes[0];
+ flags = btrfs_free_space_flags(leaf, info);
+ flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
+ block_group->using_free_space_bitmaps = false;
+ block_group->using_free_space_bitmaps_cached = true;
+ btrfs_set_free_space_flags(leaf, info, flags);
+ expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+ btrfs_release_path(path);
+
+ nrbits = block_group->length >> fs_info->sectorsize_bits;
+ start_bit = find_next_bit_le(bitmap, nrbits, 0);
+
+ while (start_bit < nrbits) {
+ end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
+ ASSERT(start_bit < end_bit);
+
+ key.objectid = start + start_bit * fs_info->sectorsize;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = (end_bit - start_bit) * fs_info->sectorsize;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ extent_count++;
+
+ start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
+ }
+
+ if (unlikely(extent_count != expected_extent_count)) {
+ btrfs_err(fs_info,
+ "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->start, extent_count,
+ expected_extent_count);
+ ret = -EIO;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kvfree(bitmap);
+ return ret;
+}
+
+static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path,
+ int new_extents)
+{
+ struct btrfs_free_space_info *info;
+ u32 flags;
+ u32 extent_count;
+ int ret = 0;
+
+ if (new_extents == 0)
+ return 0;
+
+ info = btrfs_search_free_space_info(trans, block_group, path, 1);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+
+ extent_count += new_extents;
+ btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
+ btrfs_release_path(path);
+
+ if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+ extent_count > block_group->bitmap_high_thresh) {
+ ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
+ } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+ extent_count < block_group->bitmap_low_thresh) {
+ ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
+ }
+
+ return ret;
+}
+
+EXPORT_FOR_TESTS
+bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 offset)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 found_start, found_end;
+ unsigned long ptr, i;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(offset >= found_start && offset < found_end);
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ i = div_u64(offset - found_start,
+ block_group->fs_info->sectorsize);
+ return extent_buffer_test_bit(leaf, ptr, i);
+}
+
+static void free_space_modify_bits(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 *start, u64 *size,
+ bool set_bits)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 end = *start + *size;
+ u64 found_start, found_end;
+ unsigned long ptr, first, last;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(*start >= found_start && *start < found_end);
+ ASSERT(end > found_start);
+
+ if (end > found_end)
+ end = found_end;
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ first = (*start - found_start) >> fs_info->sectorsize_bits;
+ last = (end - found_start) >> fs_info->sectorsize_bits;
+ if (set_bits)
+ extent_buffer_bitmap_set(leaf, ptr, first, last - first);
+ else
+ extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
+ btrfs_mark_buffer_dirty(trans, leaf);
+
+ *size -= end - *start;
+ *start = end;
+}
+
+/*
+ * We can't use btrfs_next_item() in modify_free_space_bitmap() because
+ * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
+ * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
+ * looking for.
+ */
+static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *p)
+{
+ struct btrfs_key key;
+
+ if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
+ p->slots[0]++;
+ return 0;
+ }
+
+ btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
+ btrfs_release_path(p);
+
+ key.objectid += key.offset;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
+}
+
+/*
+ * If remove is 1, then we are removing free space, thus clearing bits in the
+ * bitmap. If remove is 0, then we are adding free space, thus setting bits in
+ * the bitmap.
+ */
+static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path,
+ u64 start, u64 size, bool remove)
+{
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_key key;
+ u64 end = start + size;
+ u64 cur_start, cur_size;
+ bool prev_bit_set = false;
+ bool next_bit_set = false;
+ int new_extents;
+ int ret;
+
+ /*
+ * Read the bit for the block immediately before the extent of space if
+ * that block is within the block group.
+ */
+ if (start > block_group->start) {
+ u64 prev_block = start - block_group->fs_info->sectorsize;
+
+ key.objectid = prev_block;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+ if (ret)
+ return ret;
+
+ prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
+
+ /* The previous block may have been in the previous bitmap. */
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (start >= key.objectid + key.offset) {
+ ret = free_space_next_bitmap(trans, root, path);
+ if (ret)
+ return ret;
+ }
+ } else {
+ key.objectid = start;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Iterate over all of the bitmaps overlapped by the extent of space,
+ * clearing/setting bits as required.
+ */
+ cur_start = start;
+ cur_size = size;
+ while (1) {
+ free_space_modify_bits(trans, block_group, path, &cur_start,
+ &cur_size, !remove);
+ if (cur_size == 0)
+ break;
+ ret = free_space_next_bitmap(trans, root, path);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Read the bit for the block immediately after the extent of space if
+ * that block is within the block group.
+ */
+ if (end < block_group->start + block_group->length) {
+ /* The next block may be in the next bitmap. */
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (end >= key.objectid + key.offset) {
+ ret = free_space_next_bitmap(trans, root, path);
+ if (ret)
+ return ret;
+ }
+
+ next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
+ }
+
+ if (remove) {
+ new_extents = -1;
+ if (prev_bit_set) {
+ /* Leftover on the left. */
+ new_extents++;
+ }
+ if (next_bit_set) {
+ /* Leftover on the right. */
+ new_extents++;
+ }
+ } else {
+ new_extents = 1;
+ if (prev_bit_set) {
+ /* Merging with neighbor on the left. */
+ new_extents--;
+ }
+ if (next_bit_set) {
+ /* Merging with neighbor on the right. */
+ new_extents--;
+ }
+ }
+
+ btrfs_release_path(path);
+ return update_free_space_extent_count(trans, block_group, path, new_extents);
+}
+
+static int remove_free_space_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path,
+ u64 start, u64 size)
+{
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_key key;
+ u64 found_start, found_end;
+ u64 end = start + size;
+ int new_extents = -1;
+ int ret;
+
+ key.objectid = start;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ return ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(start >= found_start && end <= found_end);
+
+ /*
+ * Okay, now that we've found the free space extent which contains the
+ * free space that we are removing, there are four cases:
+ *
+ * 1. We're using the whole extent: delete the key we found and
+ * decrement the free space extent count.
+ * 2. We are using part of the extent starting at the beginning: delete
+ * the key we found and insert a new key representing the leftover at
+ * the end. There is no net change in the number of extents.
+ * 3. We are using part of the extent ending at the end: delete the key
+ * we found and insert a new key representing the leftover at the
+ * beginning. There is no net change in the number of extents.
+ * 4. We are using part of the extent in the middle: delete the key we
+ * found and insert two new keys representing the leftovers on each
+ * side. Where we used to have one extent, we now have two, so increment
+ * the extent count. We may need to convert the block group to bitmaps
+ * as a result.
+ */
+
+ /* Delete the existing key (cases 1-4). */
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ return ret;
+
+ /* Add a key for leftovers at the beginning (cases 3 and 4). */
+ if (start > found_start) {
+ key.objectid = found_start;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = start - found_start;
+
+ btrfs_release_path(path);
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (ret)
+ return ret;
+ new_extents++;
+ }
+
+ /* Add a key for leftovers at the end (cases 2 and 4). */
+ if (end < found_end) {
+ key.objectid = end;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = found_end - end;
+
+ btrfs_release_path(path);
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (ret)
+ return ret;
+ new_extents++;
+ }
+
+ btrfs_release_path(path);
+ return update_free_space_extent_count(trans, block_group, path, new_extents);
+}
+
+static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
+{
+ struct btrfs_free_space_info *info;
+ u32 flags;
+
+ if (bg->using_free_space_bitmaps_cached)
+ return bg->using_free_space_bitmaps;
+
+ info = btrfs_search_free_space_info(NULL, bg, path, 0);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ btrfs_release_path(path);
+
+ bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
+ bg->using_free_space_bitmaps_cached = true;
+
+ return bg->using_free_space_bitmaps;
+}
+
+EXPORT_FOR_TESTS
+int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 start, u64 size)
+{
+ int ret;
+
+ ret = __add_block_group_free_space(trans, block_group, path);
+ if (ret)
+ return ret;
+
+ ret = using_bitmaps(block_group, path);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ return modify_free_space_bitmap(trans, block_group, path,
+ start, size, true);
+
+ return remove_free_space_extent(trans, block_group, path, start, size);
+}
+
+int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ u64 start, u64 size)
+{
+ struct btrfs_block_group *block_group;
+ BTRFS_PATH_AUTO_FREE(path);
+ int ret;
+
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (unlikely(!path)) {
+ ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ block_group = btrfs_lookup_block_group(trans->fs_info, start);
+ if (unlikely(!block_group)) {
+ DEBUG_WARN("no block group found for start=%llu", start);
+ ret = -ENOENT;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ mutex_lock(&block_group->free_space_lock);
+ ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
+ mutex_unlock(&block_group->free_space_lock);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ btrfs_put_block_group(block_group);
+
+ return ret;
+}
+
+static int add_free_space_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path,
+ u64 start, u64 size)
+{
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ struct btrfs_key key, new_key;
+ u64 found_start, found_end;
+ u64 end = start + size;
+ int new_extents = 1;
+ int ret;
+
+ /*
+ * We are adding a new extent of free space, but we need to merge
+ * extents. There are four cases here:
+ *
+ * 1. The new extent does not have any immediate neighbors to merge
+ * with: add the new key and increment the free space extent count. We
+ * may need to convert the block group to bitmaps as a result.
+ * 2. The new extent has an immediate neighbor before it: remove the
+ * previous key and insert a new key combining both of them. There is no
+ * net change in the number of extents.
+ * 3. The new extent has an immediate neighbor after it: remove the next
+ * key and insert a new key combining both of them. There is no net
+ * change in the number of extents.
+ * 4. The new extent has immediate neighbors on both sides: remove both
+ * of the keys and insert a new key combining all of them. Where we used
+ * to have two extents, we now have one, so decrement the extent count.
+ */
+
+ new_key.objectid = start;
+ new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ new_key.offset = size;
+
+ /* Search for a neighbor on the left. */
+ if (start == block_group->start)
+ goto right;
+ key.objectid = start - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ return ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+ ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+ btrfs_release_path(path);
+ goto right;
+ }
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(found_start >= block_group->start &&
+ found_end > block_group->start);
+ ASSERT(found_start < start && found_end <= start);
+
+ /*
+ * Delete the neighbor on the left and absorb it into the new key (cases
+ * 2 and 4).
+ */
+ if (found_end == start) {
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ return ret;
+ new_key.objectid = found_start;
+ new_key.offset += key.offset;
+ new_extents--;
+ }
+ btrfs_release_path(path);
+
+right:
+ /* Search for a neighbor on the right. */
+ if (end == block_group->start + block_group->length)
+ goto insert;
+ key.objectid = end;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ return ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+ ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+ btrfs_release_path(path);
+ goto insert;
+ }
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(found_start >= block_group->start &&
+ found_end > block_group->start);
+ ASSERT((found_start < start && found_end <= start) ||
+ (found_start >= end && found_end > end));
+
+ /*
+ * Delete the neighbor on the right and absorb it into the new key
+ * (cases 3 and 4).
+ */
+ if (found_start == end) {
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ return ret;
+ new_key.offset += key.offset;
+ new_extents--;
+ }
+ btrfs_release_path(path);
+
+insert:
+ /* Insert the new key (cases 1-4). */
+ ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
+ if (ret)
+ return ret;
+
+ btrfs_release_path(path);
+ return update_free_space_extent_count(trans, block_group, path, new_extents);
+}
+
+EXPORT_FOR_TESTS
+int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 start, u64 size)
+{
+ int ret;
+
+ ret = __add_block_group_free_space(trans, block_group, path);
+ if (ret)
+ return ret;
+
+ ret = using_bitmaps(block_group, path);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ return modify_free_space_bitmap(trans, block_group, path,
+ start, size, false);
+
+ return add_free_space_extent(trans, block_group, path, start, size);
+}
+
+int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ u64 start, u64 size)
+{
+ struct btrfs_block_group *block_group;
+ BTRFS_PATH_AUTO_FREE(path);
+ int ret;
+
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (unlikely(!path)) {
+ ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ block_group = btrfs_lookup_block_group(trans->fs_info, start);
+ if (unlikely(!block_group)) {
+ DEBUG_WARN("no block group found for start=%llu", start);
+ ret = -ENOENT;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ mutex_lock(&block_group->free_space_lock);
+ ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
+ mutex_unlock(&block_group->free_space_lock);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ btrfs_put_block_group(block_group);
+
+ return ret;
+}
+
+/*
+ * Populate the free space tree by walking the extent tree. Operations on the
+ * extent tree that happen as a result of writes to the free space tree will go
+ * through the normal add/remove hooks.
+ */
+static int populate_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_root *extent_root;
+ BTRFS_PATH_AUTO_FREE(path);
+ BTRFS_PATH_AUTO_FREE(path2);
+ struct btrfs_key key;
+ u64 start, end;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path2 = btrfs_alloc_path();
+ if (!path2)
+ return -ENOMEM;
+
+ path->reada = READA_FORWARD;
+
+ ret = add_new_free_space_info(trans, block_group, path2);
+ if (ret)
+ return ret;
+
+ mutex_lock(&block_group->free_space_lock);
+
+ /*
+ * Iterate through all of the extent and metadata items in this block
+ * group, adding the free space between them and the free space at the
+ * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
+ * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
+ * contained in.
+ */
+ key.objectid = block_group->start;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
+ ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
+ if (ret < 0)
+ goto out_locked;
+ /*
+ * If ret is 1 (no key found), it means this is an empty block group,
+ * without any extents allocated from it and there's no block group
+ * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
+ * because we are using the block group tree feature (so block group
+ * items are stored in the block group tree) or this is a new block
+ * group created in the current transaction and its block group item
+ * was not yet inserted in the extent tree (that happens in
+ * btrfs_create_pending_block_groups() -> insert_block_group_item()).
+ * It also means there are no extents allocated for block groups with a
+ * start offset beyond this block group's end offset (this is the last,
+ * highest, block group).
+ */
+ start = block_group->start;
+ end = block_group->start + block_group->length;
+ while (ret == 0) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
+ if (key.objectid >= end)
+ break;
+
+ if (start < key.objectid) {
+ ret = __btrfs_add_to_free_space_tree(trans,
+ block_group,
+ path2, start,
+ key.objectid -
+ start);
+ if (ret)
+ goto out_locked;
+ }
+ start = key.objectid;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ start += trans->fs_info->nodesize;
+ else
+ start += key.offset;
+ } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ if (key.objectid != block_group->start)
+ break;
+ }
+
+ ret = btrfs_next_item(extent_root, path);
+ if (ret < 0)
+ goto out_locked;
+ }
+ if (start < end) {
+ ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
+ start, end - start);
+ if (ret)
+ goto out_locked;
+ }
+
+ ret = 0;
+out_locked:
+ mutex_unlock(&block_group->free_space_lock);
+
+ return ret;
+}
+
+int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *free_space_root;
+ struct btrfs_block_group *block_group;
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(tree_root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ free_space_root = btrfs_create_tree(trans,
+ BTRFS_FREE_SPACE_TREE_OBJECTID);
+ if (IS_ERR(free_space_root)) {
+ ret = PTR_ERR(free_space_root);
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
+ }
+ ret = btrfs_global_root_insert(free_space_root);
+ if (unlikely(ret)) {
+ btrfs_put_root(free_space_root);
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
+ }
+
+ node = rb_first_cached(&fs_info->block_group_cache_tree);
+ while (node) {
+ block_group = rb_entry(node, struct btrfs_block_group,
+ cache_node);
+ ret = populate_free_space_tree(trans, block_group);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
+ }
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ ret = btrfs_commit_transaction(trans);
+
+ /*
+ * Now that we've committed the transaction any reading of our commit
+ * root will be safe, so we can cache from the free space tree now.
+ */
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+
+out_clear:
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+}
+
+static int clear_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ BTRFS_PATH_AUTO_FREE(path);
+ struct btrfs_key key;
+ struct rb_node *node;
+ int nr;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = 0;
+ key.offset = 0;
+
+ while (1) {
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ return ret;
+
+ nr = btrfs_header_nritems(path->nodes[0]);
+ if (!nr)
+ break;
+
+ path->slots[0] = 0;
+ ret = btrfs_del_items(trans, root, path, 0, nr);
+ if (ret)
+ return ret;
+
+ btrfs_release_path(path);
+ }
+
+ node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *bg;
+
+ bg = rb_entry(node, struct btrfs_block_group, cache_node);
+ clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
+ node = rb_next(node);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+ struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
+ int ret;
+
+ trans = btrfs_start_transaction(tree_root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
+
+ ret = btrfs_del_root(trans, &free_space_root->root_key);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
+
+ btrfs_global_root_delete(free_space_root);
+
+ spin_lock(&fs_info->trans_lock);
+ list_del(&free_space_root->dirty_list);
+ spin_unlock(&fs_info->trans_lock);
+
+ btrfs_tree_lock(free_space_root->node);
+ btrfs_clear_buffer_dirty(trans, free_space_root->node);
+ btrfs_tree_unlock(free_space_root->node);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
+ free_space_root->node, 0, 1);
+ btrfs_put_root(free_space_root);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
+
+ return btrfs_commit_transaction(trans);
+}
+
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+ struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(free_space_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
+
+ node = rb_first_cached(&fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *block_group;
+
+ block_group = rb_entry(node, struct btrfs_block_group,
+ cache_node);
+
+ if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
+ &block_group->runtime_flags))
+ goto next;
+
+ ret = populate_free_space_tree(trans, block_group);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
+next:
+ if (btrfs_should_end_transaction(trans)) {
+ btrfs_end_transaction(trans);
+ trans = btrfs_start_transaction(free_space_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ }
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+
+ ret = btrfs_commit_transaction(trans);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+}
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path)
+{
+ bool own_path = false;
+ int ret;
+
+ if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ &block_group->runtime_flags))
+ return 0;
+
+ /*
+ * While rebuilding the free space tree we may allocate new metadata
+ * block groups while modifying the free space tree.
+ *
+ * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
+ * can use multiple transactions, every time btrfs_end_transaction() is
+ * called at btrfs_rebuild_free_space_tree() we finish the creation of
+ * new block groups by calling btrfs_create_pending_block_groups(), and
+ * that in turn calls us, through add_block_group_free_space(), to add
+ * a free space info item and a free space extent item for the block
+ * group.
+ *
+ * Then later btrfs_rebuild_free_space_tree() may find such new block
+ * groups and processes them with populate_free_space_tree(), which can
+ * fail with EEXIST since there are already items for the block group in
+ * the free space tree. Notice that we say "may find" because a new
+ * block group may be added to the block groups rbtree in a node before
+ * or after the block group currently being processed by the rebuild
+ * process. So signal the rebuild process to skip such new block groups
+ * if it finds them.
+ */
+ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
+
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (unlikely(!path)) {
+ btrfs_abort_transaction(trans, -ENOMEM);
+ return -ENOMEM;
+ }
+ own_path = true;
+ }
+
+ ret = add_new_free_space_info(trans, block_group, path);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
+ block_group->start, block_group->length);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+out:
+ if (own_path)
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
+{
+ int ret;
+
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ mutex_lock(&block_group->free_space_lock);
+ ret = __add_block_group_free_space(trans, block_group, NULL);
+ mutex_unlock(&block_group->free_space_lock);
+ return ret;
+}
+
+int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_root *root = btrfs_free_space_root(block_group);
+ BTRFS_PATH_AUTO_FREE(path);
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ u64 start, end;
+ int done = 0, nr;
+ int ret;
+
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
+ /* We never added this block group to the free space tree. */
+ return 0;
+ }
+
+ path = btrfs_alloc_path();
+ if (unlikely(!path)) {
+ ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ start = block_group->start;
+ end = block_group->start + block_group->length;
+
+ key.objectid = end - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while (!done) {
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ leaf = path->nodes[0];
+ nr = 0;
+ path->slots[0]++;
+ while (path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+ if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+ ASSERT(found_key.objectid == block_group->start);
+ ASSERT(found_key.offset == block_group->length);
+ done = 1;
+ nr++;
+ path->slots[0]--;
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
+ found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+ ASSERT(found_key.objectid >= start);
+ ASSERT(found_key.objectid < end);
+ ASSERT(found_key.objectid + found_key.offset <= end);
+ nr++;
+ path->slots[0]--;
+ } else {
+ ASSERT(0);
+ }
+ }
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ btrfs_release_path(path);
+ }
+
+ ret = 0;
+
+ return ret;
+}
+
+static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
+ struct btrfs_path *path,
+ u32 expected_extent_count)
+{
+ struct btrfs_block_group *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ bool prev_bit_set = false;
+ /* Initialize to silence GCC. */
+ u64 extent_start = 0;
+ u64 end, offset;
+ u64 total_found = 0;
+ u32 extent_count = 0;
+ int ret;
+
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ root = btrfs_free_space_root(block_group);
+
+ end = block_group->start + block_group->length;
+
+ while (1) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+ break;
+
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+ ASSERT(key.objectid < end && key.objectid + key.offset <= end);
+
+ offset = key.objectid;
+ while (offset < key.objectid + key.offset) {
+ bool bit_set;
+
+ bit_set = btrfs_free_space_test_bit(block_group, path, offset);
+ if (!prev_bit_set && bit_set) {
+ extent_start = offset;
+ } else if (prev_bit_set && !bit_set) {
+ u64 space_added;
+
+ ret = btrfs_add_new_free_space(block_group,
+ extent_start,
+ offset,
+ &space_added);
+ if (ret)
+ return ret;
+ total_found += space_added;
+ if (total_found > CACHING_CTL_WAKE_UP) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
+ extent_count++;
+ }
+ prev_bit_set = bit_set;
+ offset += fs_info->sectorsize;
+ }
+ }
+ if (prev_bit_set) {
+ ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
+ if (ret)
+ return ret;
+ extent_count++;
+ }
+
+ if (unlikely(extent_count != expected_extent_count)) {
+ btrfs_err(fs_info,
+ "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->start, extent_count,
+ expected_extent_count);
+ DEBUG_WARN();
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
+ struct btrfs_path *path,
+ u32 expected_extent_count)
+{
+ struct btrfs_block_group *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ u64 end;
+ u64 total_found = 0;
+ u32 extent_count = 0;
+ int ret;
+
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ root = btrfs_free_space_root(block_group);
+
+ end = block_group->start + block_group->length;
+
+ while (1) {
+ u64 space_added;
+
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+ break;
+
+ ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+ ASSERT(key.objectid < end && key.objectid + key.offset <= end);
+
+ ret = btrfs_add_new_free_space(block_group, key.objectid,
+ key.objectid + key.offset,
+ &space_added);
+ if (ret)
+ return ret;
+ total_found += space_added;
+ if (total_found > CACHING_CTL_WAKE_UP) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
+ extent_count++;
+ }
+
+ if (unlikely(extent_count != expected_extent_count)) {
+ btrfs_err(fs_info,
+ "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->start, extent_count,
+ expected_extent_count);
+ DEBUG_WARN();
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
+{
+ struct btrfs_block_group *block_group;
+ struct btrfs_free_space_info *info;
+ BTRFS_PATH_AUTO_FREE(path);
+ u32 extent_count, flags;
+
+ block_group = caching_ctl->block_group;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Just like caching_thread() doesn't want to deadlock on the extent
+ * tree, we don't want to deadlock on the free space tree.
+ */
+ path->skip_locking = true;
+ path->search_commit_root = true;
+ path->reada = READA_FORWARD;
+
+ info = btrfs_search_free_space_info(NULL, block_group, path, 0);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+
+ /*
+ * We left path pointing to the free space info item, so now
+ * load_free_space_foo can just iterate through the free space tree from
+ * there.
+ */
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
+ return load_free_space_bitmaps(caching_ctl, path, extent_count);
+ else
+ return load_free_space_extents(caching_ctl, path, extent_count);
+}