summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-io-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-io-tree.c')
-rw-r--r--fs/btrfs/extent-io-tree.c272
1 files changed, 180 insertions, 92 deletions
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index ff8e117a1ace..ea149be28dff 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -105,32 +105,40 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
lockdep_set_class(&tree->lock, &file_extent_tree_class);
}
+/*
+ * Empty an io tree, removing and freeing every extent state record from the
+ * tree. This should be called once we are sure no other task can access the
+ * tree anymore, so no tree updates happen after we empty the tree and there
+ * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never
+ * set on any extent state when calling this function).
+ */
void extent_io_tree_release(struct extent_io_tree *tree)
{
+ struct rb_root root;
+ struct extent_state *state;
+ struct extent_state *tmp;
+
spin_lock(&tree->lock);
- /*
- * Do a single barrier for the waitqueue_active check here, the state
- * of the waitqueue should not change once extent_io_tree_release is
- * called.
- */
- smp_mb();
- while (!RB_EMPTY_ROOT(&tree->state)) {
- struct rb_node *node;
- struct extent_state *state;
-
- node = rb_first(&tree->state);
- state = rb_entry(node, struct extent_state, rb_node);
- rb_erase(&state->rb_node, &tree->state);
+ root = tree->state;
+ tree->state = RB_ROOT;
+ rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) {
+ /* Clear node to keep free_extent_state() happy. */
RB_CLEAR_NODE(&state->rb_node);
+ ASSERT(!(state->state & EXTENT_LOCKED));
/*
- * btree io trees aren't supposed to have tasks waiting for
- * changes in the flags of extent states ever.
+ * No need for a memory barrier here, as we are holding the tree
+ * lock and we only change the waitqueue while holding that lock
+ * (see wait_extent_bit()).
*/
ASSERT(!waitqueue_active(&state->wq));
free_extent_state(state);
-
cond_resched_lock(&tree->lock);
}
+ /*
+ * Should still be empty even after a reschedule, no other task should
+ * be accessing the tree anymore.
+ */
+ ASSERT(RB_EMPTY_ROOT(&tree->state));
spin_unlock(&tree->lock);
}
@@ -327,6 +335,36 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
"locking error: extent tree was modified by another thread while locked");
}
+static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state)
+{
+ struct extent_state *prev;
+
+ prev = prev_state(state);
+ if (prev && prev->end == state->start - 1 && prev->state == state->state) {
+ if (tree->inode)
+ btrfs_merge_delalloc_extent(tree->inode, state, prev);
+ state->start = prev->start;
+ rb_erase(&prev->rb_node, &tree->state);
+ RB_CLEAR_NODE(&prev->rb_node);
+ free_extent_state(prev);
+ }
+}
+
+static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state)
+{
+ struct extent_state *next;
+
+ next = next_state(state);
+ if (next && next->start == state->end + 1 && next->state == state->state) {
+ if (tree->inode)
+ btrfs_merge_delalloc_extent(tree->inode, state, next);
+ state->end = next->end;
+ rb_erase(&next->rb_node, &tree->state);
+ RB_CLEAR_NODE(&next->rb_node);
+ free_extent_state(next);
+ }
+}
+
/*
* Utility function to look for merge candidates inside a given range. Any
* extents with matching state are merged together into a single extent in the
@@ -338,31 +376,11 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
*/
static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
{
- struct extent_state *other;
-
if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
return;
- other = prev_state(state);
- if (other && other->end == state->start - 1 &&
- other->state == state->state) {
- if (tree->inode)
- btrfs_merge_delalloc_extent(tree->inode, state, other);
- state->start = other->start;
- rb_erase(&other->rb_node, &tree->state);
- RB_CLEAR_NODE(&other->rb_node);
- free_extent_state(other);
- }
- other = next_state(state);
- if (other && other->start == state->end + 1 &&
- other->state == state->state) {
- if (tree->inode)
- btrfs_merge_delalloc_extent(tree->inode, state, other);
- state->end = other->end;
- rb_erase(&other->rb_node, &tree->state);
- RB_CLEAR_NODE(&other->rb_node);
- free_extent_state(other);
- }
+ merge_prev_state(tree, state);
+ merge_next_state(tree, state);
}
static void set_state_bits(struct extent_io_tree *tree,
@@ -384,19 +402,27 @@ static void set_state_bits(struct extent_io_tree *tree,
* Insert an extent_state struct into the tree. 'bits' are set on the
* struct before it is inserted.
*
- * This may return -EEXIST if the extent is already there, in which case the
- * state struct is freed.
+ * Returns a pointer to the struct extent_state record containing the range
+ * requested for insertion, which may be the same as the given struct or it
+ * may be an existing record in the tree that was expanded to accommodate the
+ * requested range. In case of an extent_state different from the one that was
+ * given, the later can be freed or reused by the caller.
+ *
+ * On error it returns an error pointer.
*
* The tree lock is not taken internally. This is a utility function and
* probably isn't what you want to call (see set/clear_extent_bit).
*/
-static int insert_state(struct extent_io_tree *tree,
- struct extent_state *state,
- u32 bits, struct extent_changeset *changeset)
+static struct extent_state *insert_state(struct extent_io_tree *tree,
+ struct extent_state *state,
+ u32 bits,
+ struct extent_changeset *changeset)
{
struct rb_node **node;
struct rb_node *parent = NULL;
- const u64 end = state->end;
+ const u64 start = state->start - 1;
+ const u64 end = state->end + 1;
+ const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY));
set_state_bits(tree, state, bits, changeset);
@@ -407,23 +433,42 @@ static int insert_state(struct extent_io_tree *tree,
parent = *node;
entry = rb_entry(parent, struct extent_state, rb_node);
- if (end < entry->start) {
+ if (state->end < entry->start) {
+ if (try_merge && end == entry->start &&
+ state->state == entry->state) {
+ if (tree->inode)
+ btrfs_merge_delalloc_extent(tree->inode,
+ state, entry);
+ entry->start = state->start;
+ merge_prev_state(tree, entry);
+ state->state = 0;
+ return entry;
+ }
node = &(*node)->rb_left;
- } else if (end > entry->end) {
+ } else if (state->end > entry->end) {
+ if (try_merge && entry->end == start &&
+ state->state == entry->state) {
+ if (tree->inode)
+ btrfs_merge_delalloc_extent(tree->inode,
+ state, entry);
+ entry->end = state->end;
+ merge_next_state(tree, entry);
+ state->state = 0;
+ return entry;
+ }
node = &(*node)->rb_right;
} else {
btrfs_err(tree->fs_info,
"found node %llu %llu on insert of %llu %llu",
- entry->start, entry->end, state->start, end);
- return -EEXIST;
+ entry->start, entry->end, state->start, state->end);
+ return ERR_PTR(-EEXIST);
}
}
rb_link_node(&state->rb_node, parent, node);
rb_insert_color(&state->rb_node, &tree->state);
- merge_state(tree, state);
- return 0;
+ return state;
}
/*
@@ -708,26 +753,13 @@ out:
}
-static void wait_on_state(struct extent_io_tree *tree,
- struct extent_state *state)
- __releases(tree->lock)
- __acquires(tree->lock)
-{
- DEFINE_WAIT(wait);
- prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&tree->lock);
- schedule();
- spin_lock(&tree->lock);
- finish_wait(&state->wq, &wait);
-}
-
/*
* Wait for one or more bits to clear on a range in the state tree.
* The range [start, end] is inclusive.
* The tree lock is taken by this function
*/
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
- struct extent_state **cached_state)
+static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ u32 bits, struct extent_state **cached_state)
{
struct extent_state *state;
@@ -758,9 +790,15 @@ process_node:
goto out;
if (state->state & bits) {
+ DEFINE_WAIT(wait);
+
start = state->start;
refcount_inc(&state->refs);
- wait_on_state(tree, state);
+ prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&tree->lock);
+ schedule();
+ spin_lock(&tree->lock);
+ finish_wait(&state->wq, &wait);
free_extent_state(state);
goto again;
}
@@ -847,10 +885,19 @@ bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
if (state->end == start - 1 && extent_state_in_tree(state)) {
while ((state = next_state(state)) != NULL) {
if (state->state & bits)
- goto got_it;
+ break;
}
+ /*
+ * If we found the next extent state, clear cached_state
+ * so that we can cache the next extent state below and
+ * avoid future calls going over the same extent state
+ * again. If we haven't found any, clear as well since
+ * it's now useless.
+ */
free_extent_state(*cached_state);
*cached_state = NULL;
+ if (state)
+ goto got_it;
goto out;
}
free_extent_state(*cached_state);
@@ -1133,6 +1180,8 @@ hit_next:
*/
if (state->start > start) {
u64 this_end;
+ struct extent_state *inserted_state;
+
if (end < last_start)
this_end = end;
else
@@ -1148,12 +1197,15 @@ hit_next:
*/
prealloc->start = start;
prealloc->end = this_end;
- err = insert_state(tree, prealloc, bits, changeset);
- if (err)
+ inserted_state = insert_state(tree, prealloc, bits, changeset);
+ if (IS_ERR(inserted_state)) {
+ err = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, err);
+ }
- cache_state(prealloc, cached_state);
- prealloc = NULL;
+ cache_state(inserted_state, cached_state);
+ if (inserted_state == prealloc)
+ prealloc = NULL;
start = this_end + 1;
goto search_again;
}
@@ -1356,6 +1408,8 @@ hit_next:
*/
if (state->start > start) {
u64 this_end;
+ struct extent_state *inserted_state;
+
if (end < last_start)
this_end = end;
else
@@ -1373,11 +1427,14 @@ hit_next:
*/
prealloc->start = start;
prealloc->end = this_end;
- err = insert_state(tree, prealloc, bits, NULL);
- if (err)
+ inserted_state = insert_state(tree, prealloc, bits, NULL);
+ if (IS_ERR(inserted_state)) {
+ err = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, err);
- cache_state(prealloc, cached_state);
- prealloc = NULL;
+ }
+ cache_state(inserted_state, cached_state);
+ if (inserted_state == prealloc)
+ prealloc = NULL;
start = this_end + 1;
goto search_again;
}
@@ -1640,15 +1697,46 @@ search:
}
/*
- * Search a range in the state tree for a given mask. If 'filled' == 1, this
- * returns 1 only if every extent in the tree has the bits set. Otherwise, 1
- * is returned if any bit in the range is found set.
+ * Check if the single @bit exists in the given range.
+ */
+bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
+{
+ struct extent_state *state = NULL;
+ bool bitset = false;
+
+ ASSERT(is_power_of_2(bit));
+
+ spin_lock(&tree->lock);
+ state = tree_search(tree, start);
+ while (state && start <= end) {
+ if (state->start > end)
+ break;
+
+ if (state->state & bit) {
+ bitset = true;
+ break;
+ }
+
+ /* If state->end is (u64)-1, start will overflow to 0 */
+ start = state->end + 1;
+ if (start > end || start == 0)
+ break;
+ state = next_state(state);
+ }
+ spin_unlock(&tree->lock);
+ return bitset;
+}
+
+/*
+ * Check if the whole range [@start,@end) contains the single @bit set.
*/
-int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, int filled, struct extent_state *cached)
+bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
+ struct extent_state *cached)
{
struct extent_state *state = NULL;
- int bitset = 0;
+ bool bitset = true;
+
+ ASSERT(is_power_of_2(bit));
spin_lock(&tree->lock);
if (cached && extent_state_in_tree(cached) && cached->start <= start &&
@@ -1657,35 +1745,35 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
else
state = tree_search(tree, start);
while (state && start <= end) {
- if (filled && state->start > start) {
- bitset = 0;
+ if (state->start > start) {
+ bitset = false;
break;
}
if (state->start > end)
break;
- if (state->state & bits) {
- bitset = 1;
- if (!filled)
- break;
- } else if (filled) {
- bitset = 0;
+ if ((state->state & bit) == 0) {
+ bitset = false;
break;
}
if (state->end == (u64)-1)
break;
+ /*
+ * Last entry (if state->end is (u64)-1 and overflow happens),
+ * or next entry starts after the range.
+ */
start = state->end + 1;
- if (start > end)
+ if (start > end || start == 0)
break;
state = next_state(state);
}
/* We ran out of states and were still inside of our range. */
- if (filled && !state)
- bitset = 0;
+ if (!state)
+ bitset = false;
spin_unlock(&tree->lock);
return bitset;
}