diff options
Diffstat (limited to 'fs/btrfs/extent-io-tree.c')
| -rw-r--r-- | fs/btrfs/extent-io-tree.c | 1959 |
1 files changed, 1959 insertions, 0 deletions
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c new file mode 100644 index 000000000000..bb2ca1c9c7b0 --- /dev/null +++ b/fs/btrfs/extent-io-tree.c @@ -0,0 +1,1959 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/slab.h> +#include <trace/events/btrfs.h> +#include "messages.h" +#include "ctree.h" +#include "extent_io.h" +#include "extent-io-tree.h" +#include "btrfs_inode.h" + +static struct kmem_cache *extent_state_cache; + +static inline bool extent_state_in_tree(const struct extent_state *state) +{ + return !RB_EMPTY_NODE(&state->rb_node); +} + +#ifdef CONFIG_BTRFS_DEBUG +static LIST_HEAD(states); +static DEFINE_SPINLOCK(leak_lock); + +static inline void btrfs_leak_debug_add_state(struct extent_state *state) +{ + unsigned long flags; + + spin_lock_irqsave(&leak_lock, flags); + list_add(&state->leak_list, &states); + spin_unlock_irqrestore(&leak_lock, flags); +} + +static inline void btrfs_leak_debug_del_state(struct extent_state *state) +{ + unsigned long flags; + + spin_lock_irqsave(&leak_lock, flags); + list_del(&state->leak_list); + spin_unlock_irqrestore(&leak_lock, flags); +} + +static inline void btrfs_extent_state_leak_debug_check(void) +{ + struct extent_state *state; + + while (!list_empty(&states)) { + state = list_first_entry(&states, struct extent_state, leak_list); + btrfs_err(NULL, + "state leak: start %llu end %llu state %u in tree %d refs %d", + state->start, state->end, state->state, + extent_state_in_tree(state), + refcount_read(&state->refs)); + list_del(&state->leak_list); + WARN_ON_ONCE(1); + kmem_cache_free(extent_state_cache, state); + } +} + +#define btrfs_debug_check_extent_io_range(tree, start, end) \ + __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) +static inline void __btrfs_debug_check_extent_io_range(const char *caller, + struct extent_io_tree *tree, + u64 start, u64 end) +{ + const struct btrfs_inode *inode = tree->inode; + u64 isize; + + if (tree->owner != IO_TREE_INODE_IO) + return; + + isize = i_size_read(&inode->vfs_inode); + if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { + btrfs_debug_rl(inode->root->fs_info, + "%s: ino %llu isize %llu odd range [%llu,%llu]", + caller, btrfs_ino(inode), isize, start, end); + } +} +#else +#define btrfs_leak_debug_add_state(state) do {} while (0) +#define btrfs_leak_debug_del_state(state) do {} while (0) +#define btrfs_extent_state_leak_debug_check() do {} while (0) +#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) +#endif + +/* Read-only access to the inode. */ +const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree) +{ + if (tree->owner == IO_TREE_INODE_IO) + return tree->inode; + return NULL; +} + +/* For read-only access to fs_info. */ +const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree) +{ + if (tree->owner == IO_TREE_INODE_IO) + return tree->inode->root->fs_info; + return tree->fs_info; +} + +void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info, + struct extent_io_tree *tree, unsigned int owner) +{ + tree->state = RB_ROOT; + spin_lock_init(&tree->lock); + tree->fs_info = fs_info; + tree->owner = owner; +} + +/* + * Empty an io tree, removing and freeing every extent state record from the + * tree. This should be called once we are sure no other task can access the + * tree anymore, so no tree updates happen after we empty the tree and there + * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never + * set on any extent state when calling this function). + */ +void btrfs_extent_io_tree_release(struct extent_io_tree *tree) +{ + struct rb_root root; + struct extent_state *state; + struct extent_state *tmp; + + spin_lock(&tree->lock); + root = tree->state; + tree->state = RB_ROOT; + rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { + /* Clear node to keep free_extent_state() happy. */ + RB_CLEAR_NODE(&state->rb_node); + ASSERT(!(state->state & EXTENT_LOCK_BITS)); + /* + * No need for a memory barrier here, as we are holding the tree + * lock and we only change the waitqueue while holding that lock + * (see wait_extent_bit()). + */ + ASSERT(!waitqueue_active(&state->wq)); + btrfs_free_extent_state(state); + cond_resched_lock(&tree->lock); + } + /* + * Should still be empty even after a reschedule, no other task should + * be accessing the tree anymore. + */ + ASSERT(RB_EMPTY_ROOT(&tree->state)); + spin_unlock(&tree->lock); +} + +static struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + + /* + * The given mask might be not appropriate for the slab allocator, + * drop the unsupported bits + */ + mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state) + return state; + state->state = 0; + RB_CLEAR_NODE(&state->rb_node); + btrfs_leak_debug_add_state(state); + refcount_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + trace_btrfs_alloc_extent_state(state, mask, _RET_IP_); + return state; +} + +static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) +{ + if (!prealloc) + prealloc = alloc_extent_state(GFP_ATOMIC); + + return prealloc; +} + +void btrfs_free_extent_state(struct extent_state *state) +{ + if (!state) + return; + if (refcount_dec_and_test(&state->refs)) { + WARN_ON(extent_state_in_tree(state)); + btrfs_leak_debug_del_state(state); + trace_btrfs_free_extent_state(state, _RET_IP_); + kmem_cache_free(extent_state_cache, state); + } +} + +static int add_extent_changeset(struct extent_state *state, u32 bits, + struct extent_changeset *changeset, + int set) +{ + int ret; + + if (!changeset) + return 0; + if (set && (state->state & bits) == bits) + return 0; + if (!set && (state->state & bits) == 0) + return 0; + changeset->bytes_changed += state->end - state->start + 1; + ret = ulist_add(&changeset->range_changed, state->start, state->end, + GFP_ATOMIC); + return ret; +} + +static inline struct extent_state *next_state(struct extent_state *state) +{ + struct rb_node *next = rb_next(&state->rb_node); + + return rb_entry_safe(next, struct extent_state, rb_node); +} + +static inline struct extent_state *prev_state(struct extent_state *state) +{ + struct rb_node *next = rb_prev(&state->rb_node); + + return rb_entry_safe(next, struct extent_state, rb_node); +} + +/* + * Search @tree for an entry that contains @offset or if none exists for the + * first entry that starts and ends after that offset. + * + * @tree: the tree to search + * @offset: search offset + * @node_ret: pointer where new node should be anchored (used when inserting an + * entry in the tree) + * @parent_ret: points to entry which would have been the parent of the entry, + * containing @offset + * + * Return a pointer to the entry that contains @offset byte address. + * + * If no such entry exists, return the first entry that starts and ends after + * @offset if one exists, otherwise NULL. + * + * If the returned entry starts at @offset, then @node_ret and @parent_ret + * aren't changed. + */ +static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, + u64 offset, + struct rb_node ***node_ret, + struct rb_node **parent_ret) +{ + struct rb_root *root = &tree->state; + struct rb_node **node = &root->rb_node; + struct rb_node *prev = NULL; + struct extent_state *entry = NULL; + + while (*node) { + prev = *node; + entry = rb_entry(prev, struct extent_state, rb_node); + + if (offset < entry->start) + node = &(*node)->rb_left; + else if (offset > entry->end) + node = &(*node)->rb_right; + else + return entry; + } + + if (node_ret) + *node_ret = node; + if (parent_ret) + *parent_ret = prev; + + /* + * Return either the current entry if it contains offset (it ends after + * or at offset) or the first entry that starts and ends after offset if + * one exists, or NULL. + */ + while (entry && offset > entry->end) + entry = next_state(entry); + + return entry; +} + +/* + * Search offset in the tree or fill neighbor rbtree node pointers. + * + * @tree: the tree to search + * @offset: offset that should fall within an entry in @tree + * @next_ret: pointer to the first entry whose range ends after @offset + * @prev_ret: pointer to the first entry whose range begins before @offset + * + * Return a pointer to the entry that contains @offset byte address. If no + * such entry exists, then return NULL and fill @prev_ret and @next_ret. + * Otherwise return the found entry and other pointers are left untouched. + */ +static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, + u64 offset, + struct extent_state **prev_ret, + struct extent_state **next_ret) +{ + struct rb_root *root = &tree->state; + struct rb_node **node = &root->rb_node; + struct extent_state *orig_prev; + struct extent_state *entry = NULL; + + ASSERT(prev_ret); + ASSERT(next_ret); + + while (*node) { + entry = rb_entry(*node, struct extent_state, rb_node); + + if (offset < entry->start) + node = &(*node)->rb_left; + else if (offset > entry->end) + node = &(*node)->rb_right; + else + return entry; + } + + orig_prev = entry; + while (entry && offset > entry->end) + entry = next_state(entry); + *next_ret = entry; + entry = orig_prev; + + while (entry && offset < entry->start) + entry = prev_state(entry); + *prev_ret = entry; + + return NULL; +} + +/* + * Inexact rb-tree search, return the next entry if @offset is not found + */ +static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) +{ + return tree_search_for_insert(tree, offset, NULL, NULL); +} + +static void __cold extent_io_tree_panic(const struct extent_io_tree *tree, + const struct extent_state *state, + const char *opname, + int err) +{ + btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err, + "extent io tree error on %s state start %llu end %llu", + opname, state->start, state->end); +} + +static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) +{ + struct extent_state *prev; + + prev = prev_state(state); + if (prev && prev->end == state->start - 1 && prev->state == state->state) { + if (tree->owner == IO_TREE_INODE_IO) + btrfs_merge_delalloc_extent(tree->inode, state, prev); + state->start = prev->start; + rb_erase(&prev->rb_node, &tree->state); + RB_CLEAR_NODE(&prev->rb_node); + btrfs_free_extent_state(prev); + } +} + +static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) +{ + struct extent_state *next; + + next = next_state(state); + if (next && next->start == state->end + 1 && next->state == state->state) { + if (tree->owner == IO_TREE_INODE_IO) + btrfs_merge_delalloc_extent(tree->inode, state, next); + state->end = next->end; + rb_erase(&next->rb_node, &tree->state); + RB_CLEAR_NODE(&next->rb_node); + btrfs_free_extent_state(next); + } +} + +/* + * Utility function to look for merge candidates inside a given range. Any + * extents with matching state are merged together into a single extent in the + * tree. Extents with EXTENT_IO in their state field are not merged because + * the end_io handlers need to be able to do operations on them without + * sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static void merge_state(struct extent_io_tree *tree, struct extent_state *state) +{ + if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) + return; + + merge_prev_state(tree, state); + merge_next_state(tree, state); +} + +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, + u32 bits, struct extent_changeset *changeset) +{ + u32 bits_to_set = bits & ~EXTENT_CTLBITS; + int ret; + + if (tree->owner == IO_TREE_INODE_IO) + btrfs_set_delalloc_extent(tree->inode, state, bits); + + ret = add_extent_changeset(state, bits_to_set, changeset, 1); + BUG_ON(ret < 0); + state->state |= bits_to_set; +} + +/* + * Insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * Returns a pointer to the struct extent_state record containing the range + * requested for insertion, which may be the same as the given struct or it + * may be an existing record in the tree that was expanded to accommodate the + * requested range. In case of an extent_state different from the one that was + * given, the later can be freed or reused by the caller. + * + * On error it returns an error pointer. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static struct extent_state *insert_state(struct extent_io_tree *tree, + struct extent_state *state, + u32 bits, + struct extent_changeset *changeset) +{ + struct rb_node **node; + struct rb_node *parent = NULL; + const u64 start = state->start - 1; + const u64 end = state->end + 1; + const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); + + set_state_bits(tree, state, bits, changeset); + + node = &tree->state.rb_node; + while (*node) { + struct extent_state *entry; + + parent = *node; + entry = rb_entry(parent, struct extent_state, rb_node); + + if (state->end < entry->start) { + if (try_merge && end == entry->start && + state->state == entry->state) { + if (tree->owner == IO_TREE_INODE_IO) + btrfs_merge_delalloc_extent(tree->inode, + state, entry); + entry->start = state->start; + merge_prev_state(tree, entry); + state->state = 0; + return entry; + } + node = &(*node)->rb_left; + } else if (state->end > entry->end) { + if (try_merge && entry->end == start && + state->state == entry->state) { + if (tree->owner == IO_TREE_INODE_IO) + btrfs_merge_delalloc_extent(tree->inode, + state, entry); + entry->end = state->end; + merge_next_state(tree, entry); + state->state = 0; + return entry; + } + node = &(*node)->rb_right; + } else { + return ERR_PTR(-EEXIST); + } + } + + rb_link_node(&state->rb_node, parent, node); + rb_insert_color(&state->rb_node, &tree->state); + + return state; +} + +/* + * Insert state to @tree to the location given by @node and @parent. + */ +static void insert_state_fast(struct extent_io_tree *tree, + struct extent_state *state, struct rb_node **node, + struct rb_node *parent, unsigned bits, + struct extent_changeset *changeset) +{ + set_state_bits(tree, state, bits, changeset); + rb_link_node(&state->rb_node, parent, node); + rb_insert_color(&state->rb_node, &tree->state); + merge_state(tree, state); +} + +/* + * Split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *parent = NULL; + struct rb_node **node; + + if (tree->owner == IO_TREE_INODE_IO) + btrfs_split_delalloc_extent(tree->inode, orig, split); + + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + + parent = &orig->rb_node; + node = &parent; + while (*node) { + struct extent_state *entry; + + parent = *node; + entry = rb_entry(parent, struct extent_state, rb_node); + + if (prealloc->end < entry->start) { + node = &(*node)->rb_left; + } else if (prealloc->end > entry->end) { + node = &(*node)->rb_right; + } else { + btrfs_free_extent_state(prealloc); + return -EEXIST; + } + } + + rb_link_node(&prealloc->rb_node, parent, node); + rb_insert_color(&prealloc->rb_node, &tree->state); + + return 0; +} + +/* + * Use this during tree iteration to avoid doing next node searches when it's + * not needed (the current record ends at or after the target range's end). + */ +static inline struct extent_state *next_search_state(struct extent_state *state, u64 end) +{ + if (state->end < end) + return next_state(state); + + return NULL; +} + +/* + * Utility function to clear some bits in an extent state struct. It will + * optionally wake up anyone waiting on this state (wake == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static struct extent_state *clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, + u32 bits, int wake, u64 end, + struct extent_changeset *changeset) +{ + struct extent_state *next; + u32 bits_to_clear = bits & ~EXTENT_CTLBITS; + int ret; + + if (tree->owner == IO_TREE_INODE_IO) + btrfs_clear_delalloc_extent(tree->inode, state, bits); + + ret = add_extent_changeset(state, bits_to_clear, changeset, 0); + BUG_ON(ret < 0); + state->state &= ~bits_to_clear; + if (wake) + wake_up(&state->wq); + if (state->state == 0) { + next = next_search_state(state, end); + if (extent_state_in_tree(state)) { + rb_erase(&state->rb_node, &tree->state); + RB_CLEAR_NODE(&state->rb_node); + btrfs_free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + next = next_search_state(state, end); + } + return next; +} + +/* + * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, + * unset the EXTENT_NOWAIT bit. + */ +static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) +{ + *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); + *bits &= EXTENT_NOWAIT - 1; +} + +/* + * Clear some bits on a range in the tree. This may require splitting or + * inserting elements in the tree, so the gfp mask is used to indicate which + * allocations or sleeping are allowed. + * + * The range [start, end] is inclusive. + * + * This takes the tree lock, and returns 0 on success and < 0 on error. + */ +int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, struct extent_state **cached_state, + struct extent_changeset *changeset) +{ + struct extent_state *state; + struct extent_state *cached; + struct extent_state *prealloc = NULL; + u64 last_end; + int ret = 0; + bool clear; + bool wake; + const bool delete = (bits & EXTENT_CLEAR_ALL_BITS); + gfp_t mask; + + set_gfp_mask_from_bits(&bits, &mask); + btrfs_debug_check_extent_io_range(tree, start, end); + trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); + + if (delete) + bits |= ~EXTENT_CTLBITS; + + if (bits & EXTENT_DELALLOC) + bits |= EXTENT_NORESERVE; + + wake = (bits & EXTENT_LOCK_BITS); + clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); +again: + if (!prealloc) { + /* + * Don't care for allocation failure here because we might end + * up not needing the pre-allocated extent state at all, which + * is the case if we only have in the tree extent states that + * cover our input range and don't cover too any other range. + * If we end up needing a new extent state we allocate it later. + */ + prealloc = alloc_extent_state(mask); + } + + spin_lock(&tree->lock); + if (cached_state) { + cached = *cached_state; + + if (clear) { + *cached_state = NULL; + cached_state = NULL; + } + + if (cached && extent_state_in_tree(cached) && + cached->start <= start && cached->end > start) { + if (clear) + refcount_dec(&cached->refs); + state = cached; + goto hit_next; + } + if (clear) + btrfs_free_extent_state(cached); + } + + /* This search will find the extents that end after our range starts. */ + state = tree_search(tree, start); + if (!state) + goto out; +hit_next: + if (state->start > end) + goto out; + WARN_ON(state->end < start); + last_end = state->end; + + /* The state doesn't have the wanted bits, go ahead. */ + if (!(state->state & bits)) { + state = next_search_state(state, end); + goto next; + } + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on second + * half. + * + * If the extent we found extends past our range, we just split and + * search again. It'll get split again the next time though. + * + * If the extent we found is inside our range, we clear the desired bit + * on it. + */ + + if (state->start < start) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + goto search_again; + ret = split_state(tree, state, prealloc, start); + prealloc = NULL; + if (ret) { + extent_io_tree_panic(tree, state, "split", ret); + goto out; + } + if (state->end <= end) { + state = clear_state_bit(tree, state, bits, wake, end, + changeset); + goto next; + } + if (need_resched()) + goto search_again; + /* + * Fallthrough and try atomic extent state allocation if needed. + * If it fails we'll jump to 'search_again' retry the allocation + * in non-atomic mode and start the search again. + */ + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit on the first half. + */ + if (state->start <= end && state->end > end) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + goto search_again; + ret = split_state(tree, state, prealloc, end + 1); + if (ret) { + extent_io_tree_panic(tree, state, "split", ret); + prealloc = NULL; + goto out; + } + + if (wake) + wake_up(&state->wq); + + clear_state_bit(tree, prealloc, bits, wake, end, changeset); + + prealloc = NULL; + goto out; + } + + state = clear_state_bit(tree, state, bits, wake, end, changeset); +next: + if (last_end >= end) + goto out; + start = last_end + 1; + if (state && !need_resched()) + goto hit_next; + +search_again: + spin_unlock(&tree->lock); + if (gfpflags_allow_blocking(mask)) + cond_resched(); + goto again; + +out: + spin_unlock(&tree->lock); + btrfs_free_extent_state(prealloc); + + return ret; + +} + +/* + * Wait for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, struct extent_state **cached_state) +{ + struct extent_state *state; + + btrfs_debug_check_extent_io_range(tree, start, end); + + spin_lock(&tree->lock); +again: + /* + * Maintain cached_state, as we may not remove it from the tree if there + * are more bits than the bits we're waiting on set on this state. + */ + if (cached_state && *cached_state) { + state = *cached_state; + if (extent_state_in_tree(state) && + state->start <= start && start < state->end) + goto process_node; + } + while (1) { + /* + * This search will find all the extents that end after our + * range starts. + */ + state = tree_search(tree, start); +process_node: + if (!state) + break; + if (state->start > end) + goto out; + + if (state->state & bits) { + DEFINE_WAIT(wait); + + start = state->start; + refcount_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&tree->lock); + schedule(); + spin_lock(&tree->lock); + finish_wait(&state->wq, &wait); + btrfs_free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (!cond_resched_lock(&tree->lock)) { + state = next_state(state); + goto process_node; + } + } +out: + /* This state is no longer useful, clear it and free it up. */ + if (cached_state && *cached_state) { + state = *cached_state; + *cached_state = NULL; + btrfs_free_extent_state(state); + } + spin_unlock(&tree->lock); +} + +static void cache_state_if_flags(struct extent_state *state, + struct extent_state **cached_ptr, + unsigned flags) +{ + if (cached_ptr && !(*cached_ptr)) { + if (!flags || (state->state & flags)) { + *cached_ptr = state; + refcount_inc(&state->refs); + } + } +} + +static void cache_state(struct extent_state *state, + struct extent_state **cached_ptr) +{ + return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); +} + +/* + * Find the first state struct with 'bits' set after 'start', and return it. + * tree->lock must be held. NULL will returned if nothing was found after + * 'start'. + */ +static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, + u64 start, u32 bits) +{ + struct extent_state *state; + + /* + * This search will find all the extents that end after our range + * starts. + */ + state = tree_search(tree, start); + while (state) { + if (state->state & bits) + return state; + state = next_state(state); + } + return NULL; +} + +/* + * Find the first offset in the io tree with one or more @bits set. + * + * Note: If there are multiple bits set in @bits, any of them will match. + * + * Return true if we find something, and update @start_ret and @end_ret. + * Return false if we found nothing. + */ +bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, u32 bits, + struct extent_state **cached_state) +{ + struct extent_state *state; + bool ret = false; + + spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->end == start - 1 && extent_state_in_tree(state)) { + while ((state = next_state(state)) != NULL) { + if (state->state & bits) + break; + } + /* + * If we found the next extent state, clear cached_state + * so that we can cache the next extent state below and + * avoid future calls going over the same extent state + * again. If we haven't found any, clear as well since + * it's now useless. + */ + btrfs_free_extent_state(*cached_state); + *cached_state = NULL; + if (state) + goto got_it; + goto out; + } + btrfs_free_extent_state(*cached_state); + *cached_state = NULL; + } + + state = find_first_extent_bit_state(tree, start, bits); +got_it: + if (state) { + cache_state_if_flags(state, cached_state, 0); + *start_ret = state->start; + *end_ret = state->end; + ret = true; + } +out: + spin_unlock(&tree->lock); + return ret; +} + +/* + * Find a contiguous area of bits + * + * @tree: io tree to check + * @start: offset to start the search from + * @start_ret: the first offset we found with the bits set + * @end_ret: the final contiguous range of the bits that were set + * @bits: bits to look for + * + * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges + * to set bits appropriately, and then merge them again. During this time it + * will drop the tree->lock, so use this helper if you want to find the actual + * contiguous area for given bits. We will search to the first bit we find, and + * then walk down the tree until we find a non-contiguous area. The area + * returned will be the full contiguous area with the bits set. + * + * Returns true if we found a range with the given bits set, in which case + * @start_ret and @end_ret are updated, or false if no range was found. + */ +bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, u32 bits) +{ + struct extent_state *state; + bool ret = false; + + ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES)); + + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, bits); + if (state) { + *start_ret = state->start; + *end_ret = state->end; + while ((state = next_state(state)) != NULL) { + if (state->start > (*end_ret + 1)) + break; + *end_ret = state->end; + } + ret = true; + } + spin_unlock(&tree->lock); + return ret; +} + +/* + * Find a contiguous range of bytes in the file marked as delalloc, not more + * than 'max_bytes'. start and end are used to return the range, + * + * True is returned if we find something, false if nothing was in the tree. + */ +bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, + u64 *end, u64 max_bytes, + struct extent_state **cached_state) +{ + struct extent_state *state; + u64 cur_start = *start; + bool found = false; + u64 total_bytes = 0; + + spin_lock(&tree->lock); + + /* + * This search will find all the extents that end after our range + * starts. + */ + state = tree_search(tree, cur_start); + if (!state) { + *end = (u64)-1; + goto out; + } + + while (state) { + if (found && (state->start != cur_start || + (state->state & EXTENT_BOUNDARY))) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; + goto out; + } + if (!found) { + *start = state->start; + *cached_state = state; + refcount_inc(&state->refs); + } + found = true; + *end = state->end; + cur_start = state->end + 1; + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + state = next_state(state); + } +out: + spin_unlock(&tree->lock); + return found; +} + +/* + * Set some bits on a range in the tree. This may require allocations or + * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for + * GFP_NOWAIT. + * + * If any of the exclusive bits are set, this will fail with -EEXIST if some + * part of the range already has the desired bits set. The extent_state of the + * existing range is returned in failed_state in this case, and the start of the + * existing range is returned in failed_start. failed_state is used as an + * optimization for wait_extent_bit, failed_start must be used as the source of + * truth as failed_state may have changed since we returned. + * + * [start, end] is inclusive This takes the tree lock. + */ +static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, u64 *failed_start, + struct extent_state **failed_state, + struct extent_state **cached_state, + struct extent_changeset *changeset) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node **p = NULL; + struct rb_node *parent = NULL; + int ret = 0; + u64 last_start; + u64 last_end; + u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); + gfp_t mask; + + set_gfp_mask_from_bits(&bits, &mask); + btrfs_debug_check_extent_io_range(tree, start, end); + trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); + + if (exclusive_bits) + ASSERT(failed_start); + else + ASSERT(failed_start == NULL && failed_state == NULL); +again: + if (!prealloc) { + /* + * Don't care for allocation failure here because we might end + * up not needing the pre-allocated extent state at all, which + * is the case if we only have in the tree extent states that + * cover our input range and don't cover too any other range. + * If we end up needing a new extent state we allocate it later. + */ + prealloc = alloc_extent_state(mask); + } + /* Optimistically preallocate the extent changeset ulist node. */ + if (changeset) + extent_changeset_prealloc(changeset, mask); + + spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->start <= start && state->end > start && + extent_state_in_tree(state)) + goto hit_next; + } + /* + * This search will find all the extents that end after our range + * starts. + */ + state = tree_search_for_insert(tree, start, &p, &parent); + if (!state) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + goto search_again; + prealloc->start = start; + prealloc->end = end; + insert_state_fast(tree, prealloc, p, parent, bits, changeset); + cache_state(prealloc, cached_state); + prealloc = NULL; + goto out; + } +hit_next: + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + if (state->state & exclusive_bits) { + *failed_start = state->start; + cache_state(state, failed_state); + ret = -EEXIST; + goto out; + } + + set_state_bits(tree, state, bits, changeset); + cache_state(state, cached_state); + merge_state(tree, state); + if (last_end >= end) + goto out; + start = last_end + 1; + state = next_state(state); + if (state && state->start == start && !need_resched()) + goto hit_next; + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on second + * half. + * + * If the extent we found extends past our range, we just split and + * search again. It'll get split again the next time though. + * + * If the extent we found is inside our range, we set the desired bit + * on it. + */ + if (state->start < start) { + if (state->state & exclusive_bits) { + *failed_start = start; + cache_state(state, failed_state); + ret = -EEXIST; + goto out; + } + + /* + * If this extent already has all the bits we want set, then + * skip it, not necessary to split it or do anything with it. + */ + if ((state->state & bits) == bits) { + start = state->end + 1; + cache_state(state, cached_state); + goto search_again; + } + + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + goto search_again; + ret = split_state(tree, state, prealloc, start); + if (ret) + extent_io_tree_panic(tree, state, "split", ret); + + prealloc = NULL; + if (ret) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, bits, changeset); + cache_state(state, cached_state); + merge_state(tree, state); + if (last_end >= end) + goto out; + start = last_end + 1; + state = next_state(state); + if (state && state->start == start && !need_resched()) + goto hit_next; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and ignore the + * extent we found. + */ + if (state->start > start) { + struct extent_state *inserted_state; + + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + goto search_again; + + /* + * Avoid to free 'prealloc' if it can be merged with the later + * extent. + */ + prealloc->start = start; + if (end < last_start) + prealloc->end = end; + else + prealloc->end = last_start - 1; + + inserted_state = insert_state(tree, prealloc, bits, changeset); + if (IS_ERR(inserted_state)) { + ret = PTR_ERR(inserted_state); + extent_io_tree_panic(tree, prealloc, "insert", ret); + goto out; + } + + cache_state(inserted_state, cached_state); + if (inserted_state == prealloc) + prealloc = NULL; + start = inserted_state->end + 1; + + /* Beyond target range, stop. */ + if (start > end) + goto out; + + if (need_resched()) + goto search_again; + + state = next_search_state(inserted_state, end); + /* + * If there's a next state, whether contiguous or not, we don't + * need to unlock and start search again. If it's not contiguous + * we will end up here and try to allocate a prealloc state and insert. + */ + if (state) + goto hit_next; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * + * We need to split the extent, and set the bit on the first half + */ + if (state->start <= end && state->end > end) { + if (state->state & exclusive_bits) { + *failed_start = start; + cache_state(state, failed_state); + ret = -EEXIST; + goto out; + } + + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + goto search_again; + ret = split_state(tree, state, prealloc, end + 1); + if (ret) { + extent_io_tree_panic(tree, state, "split", ret); + prealloc = NULL; + goto out; + } + + set_state_bits(tree, prealloc, bits, changeset); + cache_state(prealloc, cached_state); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + +search_again: + if (start > end) + goto out; + spin_unlock(&tree->lock); + if (gfpflags_allow_blocking(mask)) + cond_resched(); + goto again; + +out: + spin_unlock(&tree->lock); + btrfs_free_extent_state(prealloc); + + return ret; + +} + +int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, struct extent_state **cached_state) +{ + return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL); +} + +/* + * Convert all bits in a given range from one bit to another + * + * @tree: the io tree to search + * @start: the start offset in bytes + * @end: the end offset in bytes (inclusive) + * @bits: the bits to set in this range + * @clear_bits: the bits to clear in this range + * @cached_state: state that we're going to cache + * + * This will go through and set bits for the given range. If any states exist + * already in this range they are set with the given bit and cleared of the + * clear_bits. This is only meant to be used by things that are mergeable, ie. + * converting from say DELALLOC to DIRTY. This is not meant to be used with + * boundary bits like LOCK. + * + * All allocations are done with GFP_NOFS. + */ +int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, u32 clear_bits, + struct extent_state **cached_state) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node **p = NULL; + struct rb_node *parent = NULL; + int ret = 0; + u64 last_start; + u64 last_end; + bool first_iteration = true; + + btrfs_debug_check_extent_io_range(tree, start, end); + trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, + clear_bits); + +again: + if (!prealloc) { + /* + * Best effort, don't worry if extent state allocation fails + * here for the first iteration. We might have a cached state + * that matches exactly the target range, in which case no + * extent state allocations are needed. We'll only know this + * after locking the tree. + */ + prealloc = alloc_extent_state(GFP_NOFS); + if (!prealloc && !first_iteration) + return -ENOMEM; + } + + spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->start <= start && state->end > start && + extent_state_in_tree(state)) + goto hit_next; + } + + /* + * This search will find all the extents that end after our range + * starts. + */ + state = tree_search_for_insert(tree, start, &p, &parent); + if (!state) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + ret = -ENOMEM; + goto out; + } + prealloc->start = start; + prealloc->end = end; + insert_state_fast(tree, prealloc, p, parent, bits, NULL); + cache_state(prealloc, cached_state); + prealloc = NULL; + goto out; + } +hit_next: + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going. + */ + if (state->start == start && state->end <= end) { + set_state_bits(tree, state, bits, NULL); + cache_state(state, cached_state); + state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); + if (last_end >= end) + goto out; + start = last_end + 1; + if (state && state->start == start && !need_resched()) + goto hit_next; + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on second + * half. + * + * If the extent we found extends past our range, we just split and + * search again. It'll get split again the next time though. + * + * If the extent we found is inside our range, we set the desired bit + * on it. + */ + if (state->start < start) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + ret = -ENOMEM; + goto out; + } + ret = split_state(tree, state, prealloc, start); + prealloc = NULL; + if (ret) { + extent_io_tree_panic(tree, state, "split", ret); + goto out; + } + if (state->end <= end) { + set_state_bits(tree, state, bits, NULL); + cache_state(state, cached_state); + state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); + if (last_end >= end) + goto out; + start = last_end + 1; + if (state && state->start == start && !need_resched()) + goto hit_next; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and ignore the + * extent we found. + */ + if (state->start > start) { + struct extent_state *inserted_state; + + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + ret = -ENOMEM; + goto out; + } + + /* + * Avoid to free 'prealloc' if it can be merged with the later + * extent. + */ + prealloc->start = start; + if (end < last_start) + prealloc->end = end; + else + prealloc->end = last_start - 1; + + inserted_state = insert_state(tree, prealloc, bits, NULL); + if (IS_ERR(inserted_state)) { + ret = PTR_ERR(inserted_state); + extent_io_tree_panic(tree, prealloc, "insert", ret); + goto out; + } + cache_state(inserted_state, cached_state); + if (inserted_state == prealloc) + prealloc = NULL; + start = inserted_state->end + 1; + + /* Beyond target range, stop. */ + if (start > end) + goto out; + + if (need_resched()) + goto search_again; + + state = next_search_state(inserted_state, end); + /* + * If there's a next state, whether contiguous or not, we don't + * need to unlock and start search again. If it's not contiguous + * we will end up here and try to allocate a prealloc state and insert. + */ + if (state) + goto hit_next; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * + * We need to split the extent, and set the bit on the first half. + */ + if (state->start <= end && state->end > end) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + ret = -ENOMEM; + goto out; + } + + ret = split_state(tree, state, prealloc, end + 1); + if (ret) { + extent_io_tree_panic(tree, state, "split", ret); + prealloc = NULL; + goto out; + } + + set_state_bits(tree, prealloc, bits, NULL); + cache_state(prealloc, cached_state); + clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL); + prealloc = NULL; + goto out; + } + +search_again: + if (start > end) + goto out; + spin_unlock(&tree->lock); + cond_resched(); + first_iteration = false; + goto again; + +out: + spin_unlock(&tree->lock); + btrfs_free_extent_state(prealloc); + + return ret; +} + +/* + * Find the first range that has @bits not set. This range could start before + * @start. + * + * @tree: the tree to search + * @start: offset at/after which the found extent should start + * @start_ret: records the beginning of the range + * @end_ret: records the end of the range (inclusive) + * @bits: the set of bits which must be unset + * + * Since unallocated range is also considered one which doesn't have the bits + * set it's possible that @end_ret contains -1, this happens in case the range + * spans (last_range_end, end of device]. In this case it's up to the caller to + * trim @end_ret to the appropriate size. + */ +void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, u32 bits) +{ + struct extent_state *state; + struct extent_state *prev = NULL, *next = NULL; + + spin_lock(&tree->lock); + + /* Find first extent with bits cleared */ + while (1) { + state = tree_search_prev_next(tree, start, &prev, &next); + if (!state && !next && !prev) { + /* + * Tree is completely empty, send full range and let + * caller deal with it + */ + *start_ret = 0; + *end_ret = -1; + goto out; + } else if (!state && !next) { + /* + * We are past the last allocated chunk, set start at + * the end of the last extent. + */ + *start_ret = prev->end + 1; + *end_ret = -1; + goto out; + } else if (!state) { + state = next; + } + + /* + * At this point 'state' either contains 'start' or start is + * before 'state' + */ + if (in_range(start, state->start, state->end - state->start + 1)) { + if (state->state & bits) { + /* + * |--range with bits sets--| + * | + * start + */ + start = state->end + 1; + } else { + /* + * 'start' falls within a range that doesn't + * have the bits set, so take its start as the + * beginning of the desired range + * + * |--range with bits cleared----| + * | + * start + */ + *start_ret = state->start; + break; + } + } else { + /* + * |---prev range---|---hole/unset---|---node range---| + * | + * start + * + * or + * + * |---hole/unset--||--first node--| + * 0 | + * start + */ + if (prev) + *start_ret = prev->end + 1; + else + *start_ret = 0; + break; + } + } + + /* + * Find the longest stretch from start until an entry which has the + * bits set + */ + while (state) { + if (state->end >= start && !(state->state & bits)) { + *end_ret = state->end; + } else { + *end_ret = state->start - 1; + break; + } + state = next_state(state); + } +out: + spin_unlock(&tree->lock); +} + +/* + * Count the number of bytes in the tree that have a given bit(s) set for a + * given range. + * + * @tree: The io tree to search. + * @start: The start offset of the range. This value is updated to the + * offset of the first byte found with the given bit(s), so it + * can end up being bigger than the initial value. + * @search_end: The end offset (inclusive value) of the search range. + * @max_bytes: The maximum byte count we are interested. The search stops + * once it reaches this count. + * @bits: The bits the range must have in order to be accounted for. + * If multiple bits are set, then only subranges that have all + * the bits set are accounted for. + * @contig: Indicate if we should ignore holes in the range or not. If + * this is true, then stop once we find a hole. + * @cached_state: A cached state to be used across multiple calls to this + * function in order to speedup searches. Use NULL if this is + * called only once or if each call does not start where the + * previous one ended. + * + * Returns the total number of bytes found within the given range that have + * all given bits set. If the returned number of bytes is greater than zero + * then @start is updated with the offset of the first byte with the bits set. + */ +u64 btrfs_count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, u64 max_bytes, + u32 bits, bool contig, + struct extent_state **cached_state) +{ + struct extent_state *state = NULL; + struct extent_state *cached; + u64 cur_start = *start; + u64 total_bytes = 0; + u64 last = 0; + int found = 0; + + if (WARN_ON(search_end < cur_start)) + return 0; + + spin_lock(&tree->lock); + + if (!cached_state || !*cached_state) + goto search; + + cached = *cached_state; + + if (!extent_state_in_tree(cached)) + goto search; + + if (cached->start <= cur_start && cur_start <= cached->end) { + state = cached; + } else if (cached->start > cur_start) { + struct extent_state *prev; + + /* + * The cached state starts after our search range's start. Check + * if the previous state record starts at or before the range we + * are looking for, and if so, use it - this is a common case + * when there are holes between records in the tree. If there is + * no previous state record, we can start from our cached state. + */ + prev = prev_state(cached); + if (!prev) + state = cached; + else if (prev->start <= cur_start && cur_start <= prev->end) + state = prev; + } + + /* + * This search will find all the extents that end after our range + * starts. + */ +search: + if (!state) + state = tree_search(tree, cur_start); + + while (state) { + if (state->start > search_end) + break; + if (contig && found && state->start > last + 1) + break; + if (state->end >= cur_start && (state->state & bits) == bits) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = max(cur_start, state->start); + found = 1; + } + last = state->end; + } else if (contig && found) { + break; + } + state = next_state(state); + } + + if (cached_state) { + btrfs_free_extent_state(*cached_state); + *cached_state = state; + if (state) + refcount_inc(&state->refs); + } + + spin_unlock(&tree->lock); + + return total_bytes; +} + +/* + * Check if the single @bit exists in the given range. + */ +bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) +{ + struct extent_state *state; + bool bitset = false; + + ASSERT(is_power_of_2(bit)); + + spin_lock(&tree->lock); + state = tree_search(tree, start); + while (state) { + if (state->start > end) + break; + + if (state->state & bit) { + bitset = true; + break; + } + + if (state->end >= end) + break; + state = next_state(state); + } + spin_unlock(&tree->lock); + return bitset; +} + +void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits, + struct extent_state **cached_state) +{ + struct extent_state *state; + + /* + * The cached state is currently mandatory and not used to start the + * search, only to cache the first state record found in the range. + */ + ASSERT(cached_state != NULL); + ASSERT(*cached_state == NULL); + + *bits = 0; + + spin_lock(&tree->lock); + state = tree_search(tree, start); + if (state && state->start < end) { + *cached_state = state; + refcount_inc(&state->refs); + } + while (state) { + if (state->start > end) + break; + + *bits |= state->state; + + if (state->end >= end) + break; + + state = next_state(state); + } + spin_unlock(&tree->lock); +} + +/* + * Check if the whole range [@start,@end) contains the single @bit set. + */ +bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, + struct extent_state *cached) +{ + struct extent_state *state; + bool bitset = true; + + ASSERT(is_power_of_2(bit)); + ASSERT(start < end); + + spin_lock(&tree->lock); + if (cached && extent_state_in_tree(cached) && cached->start <= start && + cached->end > start) + state = cached; + else + state = tree_search(tree, start); + while (state) { + if (state->start > start) { + bitset = false; + break; + } + + if ((state->state & bit) == 0) { + bitset = false; + break; + } + + if (state->end >= end) + break; + + /* Next state must start where this one ends. */ + start = state->end + 1; + state = next_state(state); + } + + /* We ran out of states and were still inside of our range. */ + if (!state) + bitset = false; + spin_unlock(&tree->lock); + return bitset; +} + +/* Wrappers around set/clear extent bit */ +int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, struct extent_changeset *changeset) +{ + /* + * We don't support EXTENT_LOCK_BITS yet, as current changeset will + * record any bits changed, so for EXTENT_LOCK_BITS case, it will either + * fail with -EEXIST or changeset will record the whole range. + */ + ASSERT(!(bits & EXTENT_LOCK_BITS)); + + return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); +} + +int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, struct extent_changeset *changeset) +{ + /* + * Don't support EXTENT_LOCK_BITS case, same reason as + * set_record_extent_bits(). + */ + ASSERT(!(bits & EXTENT_LOCK_BITS)); + + return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset); +} + +bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + u32 bits, struct extent_state **cached) +{ + int ret; + u64 failed_start; + + ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL); + if (ret == -EEXIST) { + if (failed_start > start) + btrfs_clear_extent_bit(tree, start, failed_start - 1, + bits, cached); + return 0; + } + return 1; +} + +/* + * Either insert or lock state struct between start and end use mask to tell + * us if waiting is desired. + */ +int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, + struct extent_state **cached_state) +{ + struct extent_state *failed_state = NULL; + int ret; + u64 failed_start; + + ret = set_extent_bit(tree, start, end, bits, &failed_start, + &failed_state, cached_state, NULL); + while (ret == -EEXIST) { + if (failed_start != start) + btrfs_clear_extent_bit(tree, start, failed_start - 1, + bits, cached_state); + + wait_extent_bit(tree, failed_start, end, bits, &failed_state); + ret = set_extent_bit(tree, start, end, bits, &failed_start, + &failed_state, cached_state, NULL); + } + return ret; +} + +/* + * Get the extent state that follows the given extent state. + * This is meant to be used in a context where we know no other tasks can + * concurrently modify the tree. + */ +struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *next; + + spin_lock(&tree->lock); + ASSERT(extent_state_in_tree(state)); + next = next_state(state); + if (next) + refcount_inc(&next->refs); + spin_unlock(&tree->lock); + + return next; +} + +void __cold btrfs_extent_state_free_cachep(void) +{ + btrfs_extent_state_leak_debug_check(); + kmem_cache_destroy(extent_state_cache); +} + +int __init btrfs_extent_state_init_cachep(void) +{ + extent_state_cache = kmem_cache_create("btrfs_extent_state", + sizeof(struct extent_state), 0, 0, + NULL); + if (!extent_state_cache) + return -ENOMEM; + + return 0; +} |
