summaryrefslogtreecommitdiff
path: root/fs/btrfs/space-info.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2019-06-18 16:09:25 -0400
committerDavid Sterba <dsterba@suse.com>2019-07-02 12:30:53 +0200
commit0d9764f6d0fb9dd4d4b773b481f259c0567870c2 (patch)
tree0961e6ea42d1c7b3eb462739f643ea440df09fec /fs/btrfs/space-info.c
parent5da6afeb32e97f956aa3d599b7f94ceb36fcf854 (diff)
btrfs: move reserve_metadata_bytes and supporting code to space-info.c
This moves all of the metadata reservation code into space-info.c. Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r--fs/btrfs/space-info.c698
1 files changed, 698 insertions, 0 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 41dfb1d4ea86..1ac58d7e7790 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -5,6 +5,9 @@
#include "sysfs.h"
#include "volumes.h"
#include "free-space-cache.h"
+#include "ordered-data.h"
+#include "transaction.h"
+#include "math.h"
u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
bool may_use_included)
@@ -401,3 +404,698 @@ again:
goto again;
up_read(&info->groups_sem);
}
+
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
+ unsigned long nr_pages, int nr_items)
+{
+ struct super_block *sb = fs_info->sb;
+
+ if (down_read_trylock(&sb->s_umount)) {
+ writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
+ up_read(&sb->s_umount);
+ } else {
+ /*
+ * We needn't worry the filesystem going from r/w to r/o though
+ * we don't acquire ->s_umount mutex, because the filesystem
+ * should guarantee the delalloc inodes list be empty after
+ * the filesystem is readonly(all dirty pages are written to
+ * the disk).
+ */
+ btrfs_start_delalloc_roots(fs_info, nr_items);
+ if (!current->journal_info)
+ btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
+ }
+}
+
+static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
+ u64 to_reclaim)
+{
+ u64 bytes;
+ u64 nr;
+
+ bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
+ nr = div64_u64(to_reclaim, bytes);
+ if (!nr)
+ nr = 1;
+ return nr;
+}
+
+#define EXTENT_SIZE_PER_ITEM SZ_256K
+
+/*
+ * shrink metadata reservation for delalloc
+ */
+static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
+ u64 orig, bool wait_ordered)
+{
+ struct btrfs_space_info *space_info;
+ struct btrfs_trans_handle *trans;
+ u64 delalloc_bytes;
+ u64 dio_bytes;
+ u64 async_pages;
+ u64 items;
+ long time_left;
+ unsigned long nr_pages;
+ int loops;
+
+ /* Calc the number of the pages we need flush for space reservation */
+ items = calc_reclaim_items_nr(fs_info, to_reclaim);
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+ delalloc_bytes = percpu_counter_sum_positive(
+ &fs_info->delalloc_bytes);
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+ if (delalloc_bytes == 0 && dio_bytes == 0) {
+ if (trans)
+ return;
+ if (wait_ordered)
+ btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
+ return;
+ }
+
+ /*
+ * If we are doing more ordered than delalloc we need to just wait on
+ * ordered extents, otherwise we'll waste time trying to flush delalloc
+ * that likely won't give us the space back we need.
+ */
+ if (dio_bytes > delalloc_bytes)
+ wait_ordered = true;
+
+ loops = 0;
+ while ((delalloc_bytes || dio_bytes) && loops < 3) {
+ nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
+
+ /*
+ * Triggers inode writeback for up to nr_pages. This will invoke
+ * ->writepages callback and trigger delalloc filling
+ * (btrfs_run_delalloc_range()).
+ */
+ btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
+
+ /*
+ * We need to wait for the compressed pages to start before
+ * we continue.
+ */
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
+ goto skip_async;
+
+ /*
+ * Calculate how many compressed pages we want to be written
+ * before we continue. I.e if there are more async pages than we
+ * require wait_event will wait until nr_pages are written.
+ */
+ if (async_pages <= nr_pages)
+ async_pages = 0;
+ else
+ async_pages -= nr_pages;
+
+ wait_event(fs_info->async_submit_wait,
+ atomic_read(&fs_info->async_delalloc_pages) <=
+ (int)async_pages);
+skip_async:
+ spin_lock(&space_info->lock);
+ if (list_empty(&space_info->tickets) &&
+ list_empty(&space_info->priority_tickets)) {
+ spin_unlock(&space_info->lock);
+ break;
+ }
+ spin_unlock(&space_info->lock);
+
+ loops++;
+ if (wait_ordered && !trans) {
+ btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
+ } else {
+ time_left = schedule_timeout_killable(1);
+ if (time_left)
+ break;
+ }
+ delalloc_bytes = percpu_counter_sum_positive(
+ &fs_info->delalloc_bytes);
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+ }
+}
+
+/**
+ * maybe_commit_transaction - possibly commit the transaction if its ok to
+ * @root - the root we're allocating for
+ * @bytes - the number of bytes we want to reserve
+ * @force - force the commit
+ *
+ * This will check to make sure that committing the transaction will actually
+ * get us somewhere and then commit the transaction if it does. Otherwise it
+ * will return -ENOSPC.
+ */
+static int may_commit_transaction(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info)
+{
+ struct reserve_ticket *ticket = NULL;
+ struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_trans_handle *trans;
+ u64 bytes_needed;
+ u64 reclaim_bytes = 0;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
+ if (trans)
+ return -EAGAIN;
+
+ spin_lock(&space_info->lock);
+ if (!list_empty(&space_info->priority_tickets))
+ ticket = list_first_entry(&space_info->priority_tickets,
+ struct reserve_ticket, list);
+ else if (!list_empty(&space_info->tickets))
+ ticket = list_first_entry(&space_info->tickets,
+ struct reserve_ticket, list);
+ bytes_needed = (ticket) ? ticket->bytes : 0;
+ spin_unlock(&space_info->lock);
+
+ if (!bytes_needed)
+ return 0;
+
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * See if there is enough pinned space to make this reservation, or if
+ * we have block groups that are going to be freed, allowing us to
+ * possibly do a chunk allocation the next loop through.
+ */
+ if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+ __percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes_needed,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
+ goto commit;
+
+ /*
+ * See if there is some space in the delayed insertion reservation for
+ * this reservation.
+ */
+ if (space_info != delayed_rsv->space_info)
+ goto enospc;
+
+ spin_lock(&delayed_rsv->lock);
+ reclaim_bytes += delayed_rsv->reserved;
+ spin_unlock(&delayed_rsv->lock);
+
+ spin_lock(&delayed_refs_rsv->lock);
+ reclaim_bytes += delayed_refs_rsv->reserved;
+ spin_unlock(&delayed_refs_rsv->lock);
+ if (reclaim_bytes >= bytes_needed)
+ goto commit;
+ bytes_needed -= reclaim_bytes;
+
+ if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes_needed,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
+ goto enospc;
+
+commit:
+ return btrfs_commit_transaction(trans);
+enospc:
+ btrfs_end_transaction(trans);
+ return -ENOSPC;
+}
+
+/*
+ * Try to flush some data based on policy set by @state. This is only advisory
+ * and may fail for various reasons. The caller is supposed to examine the
+ * state of @space_info to detect the outcome.
+ */
+static void flush_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info, u64 num_bytes,
+ int state)
+{
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_trans_handle *trans;
+ int nr;
+ int ret = 0;
+
+ switch (state) {
+ case FLUSH_DELAYED_ITEMS_NR:
+ case FLUSH_DELAYED_ITEMS:
+ if (state == FLUSH_DELAYED_ITEMS_NR)
+ nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
+ else
+ nr = -1;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ ret = btrfs_run_delayed_items_nr(trans, nr);
+ btrfs_end_transaction(trans);
+ break;
+ case FLUSH_DELALLOC:
+ case FLUSH_DELALLOC_WAIT:
+ shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
+ state == FLUSH_DELALLOC_WAIT);
+ break;
+ case FLUSH_DELAYED_REFS_NR:
+ case FLUSH_DELAYED_REFS:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ if (state == FLUSH_DELAYED_REFS_NR)
+ nr = calc_reclaim_items_nr(fs_info, num_bytes);
+ else
+ nr = 0;
+ btrfs_run_delayed_refs(trans, nr);
+ btrfs_end_transaction(trans);
+ break;
+ case ALLOC_CHUNK:
+ case ALLOC_CHUNK_FORCE:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ ret = btrfs_chunk_alloc(trans,
+ btrfs_metadata_alloc_profile(fs_info),
+ (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
+ CHUNK_ALLOC_FORCE);
+ btrfs_end_transaction(trans);
+ if (ret > 0 || ret == -ENOSPC)
+ ret = 0;
+ break;
+ case COMMIT_TRANS:
+ /*
+ * If we have pending delayed iputs then we could free up a
+ * bunch of pinned space, so make sure we run the iputs before
+ * we do our pinned bytes check below.
+ */
+ btrfs_run_delayed_iputs(fs_info);
+ btrfs_wait_on_delayed_iputs(fs_info);
+
+ ret = may_commit_transaction(fs_info, space_info);
+ break;
+ default:
+ ret = -ENOSPC;
+ break;
+ }
+
+ trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
+ ret);
+ return;
+}
+
+static inline u64
+btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ bool system_chunk)
+{
+ struct reserve_ticket *ticket;
+ u64 used;
+ u64 expected;
+ u64 to_reclaim = 0;
+
+ list_for_each_entry(ticket, &space_info->tickets, list)
+ to_reclaim += ticket->bytes;
+ list_for_each_entry(ticket, &space_info->priority_tickets, list)
+ to_reclaim += ticket->bytes;
+ if (to_reclaim)
+ return to_reclaim;
+
+ to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
+ if (btrfs_can_overcommit(fs_info, space_info, to_reclaim,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
+ return 0;
+
+ used = btrfs_space_info_used(space_info, true);
+
+ if (btrfs_can_overcommit(fs_info, space_info, SZ_1M,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
+ expected = div_factor_fine(space_info->total_bytes, 95);
+ else
+ expected = div_factor_fine(space_info->total_bytes, 90);
+
+ if (used > expected)
+ to_reclaim = used - expected;
+ else
+ to_reclaim = 0;
+ to_reclaim = min(to_reclaim, space_info->bytes_may_use +
+ space_info->bytes_reserved);
+ return to_reclaim;
+}
+
+static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 used, bool system_chunk)
+{
+ u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+
+ /* If we're just plain full then async reclaim just slows us down. */
+ if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
+ return 0;
+
+ if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ system_chunk))
+ return 0;
+
+ return (used >= thresh && !btrfs_fs_closing(fs_info) &&
+ !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+}
+
+static bool wake_all_tickets(struct list_head *head)
+{
+ struct reserve_ticket *ticket;
+
+ while (!list_empty(head)) {
+ ticket = list_first_entry(head, struct reserve_ticket, list);
+ list_del_init(&ticket->list);
+ ticket->error = -ENOSPC;
+ wake_up(&ticket->wait);
+ if (ticket->bytes != ticket->orig_bytes)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * This is for normal flushers, we can wait all goddamned day if we want to. We
+ * will loop and continuously try to flush as long as we are making progress.
+ * We count progress as clearing off tickets each time we have to loop.
+ */
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_space_info *space_info;
+ u64 to_reclaim;
+ int flush_state;
+ int commit_cycles = 0;
+ u64 last_tickets_id;
+
+ fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
+ if (!to_reclaim) {
+ space_info->flush = 0;
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ last_tickets_id = space_info->tickets_id;
+ spin_unlock(&space_info->lock);
+
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ do {
+ flush_space(fs_info, space_info, to_reclaim, flush_state);
+ spin_lock(&space_info->lock);
+ if (list_empty(&space_info->tickets)) {
+ space_info->flush = 0;
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
+ space_info,
+ false);
+ if (last_tickets_id == space_info->tickets_id) {
+ flush_state++;
+ } else {
+ last_tickets_id = space_info->tickets_id;
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ if (commit_cycles)
+ commit_cycles--;
+ }
+
+ /*
+ * We don't want to force a chunk allocation until we've tried
+ * pretty hard to reclaim space. Think of the case where we
+ * freed up a bunch of space and so have a lot of pinned space
+ * to reclaim. We would rather use that than possibly create a
+ * underutilized metadata chunk. So if this is our first run
+ * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+ * commit the transaction. If nothing has changed the next go
+ * around then we can force a chunk allocation.
+ */
+ if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+ flush_state++;
+
+ if (flush_state > COMMIT_TRANS) {
+ commit_cycles++;
+ if (commit_cycles > 2) {
+ if (wake_all_tickets(&space_info->tickets)) {
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ commit_cycles--;
+ } else {
+ space_info->flush = 0;
+ }
+ } else {
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ }
+ }
+ spin_unlock(&space_info->lock);
+ } while (flush_state <= COMMIT_TRANS);
+}
+
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+ INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+
+static const enum btrfs_flush_state priority_flush_states[] = {
+ FLUSH_DELAYED_ITEMS_NR,
+ FLUSH_DELAYED_ITEMS,
+ ALLOC_CHUNK,
+};
+
+static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+{
+ u64 to_reclaim;
+ int flush_state;
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
+ if (!to_reclaim) {
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ spin_unlock(&space_info->lock);
+
+ flush_state = 0;
+ do {
+ flush_space(fs_info, space_info, to_reclaim,
+ priority_flush_states[flush_state]);
+ flush_state++;
+ spin_lock(&space_info->lock);
+ if (ticket->bytes == 0) {
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ spin_unlock(&space_info->lock);
+ } while (flush_state < ARRAY_SIZE(priority_flush_states));
+}
+
+static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+
+{
+ DEFINE_WAIT(wait);
+ u64 reclaim_bytes = 0;
+ int ret = 0;
+
+ spin_lock(&space_info->lock);
+ while (ticket->bytes > 0 && ticket->error == 0) {
+ ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+ if (ret) {
+ ret = -EINTR;
+ break;
+ }
+ spin_unlock(&space_info->lock);
+
+ schedule();
+
+ finish_wait(&ticket->wait, &wait);
+ spin_lock(&space_info->lock);
+ }
+ if (!ret)
+ ret = ticket->error;
+ if (!list_empty(&ticket->list))
+ list_del_init(&ticket->list);
+ if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
+ reclaim_bytes = ticket->orig_bytes - ticket->bytes;
+ spin_unlock(&space_info->lock);
+
+ if (reclaim_bytes)
+ btrfs_space_info_add_old_bytes(fs_info, space_info,
+ reclaim_bytes);
+ return ret;
+}
+
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @space_info - the space info we want to allocate from
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orig_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
+{
+ struct reserve_ticket ticket;
+ u64 used;
+ u64 reclaim_bytes = 0;
+ int ret = 0;
+
+ ASSERT(orig_bytes);
+ ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
+
+ spin_lock(&space_info->lock);
+ ret = -ENOSPC;
+ used = btrfs_space_info_used(space_info, true);
+
+ /*
+ * If we have enough space then hooray, make our reservation and carry
+ * on. If not see if we can overcommit, and if we can, hooray carry on.
+ * If not things get more complicated.
+ */
+ if (used + orig_bytes <= space_info->total_bytes) {
+ btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+ orig_bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags, orig_bytes, 1);
+ ret = 0;
+ } else if (btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush,
+ system_chunk)) {
+ btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+ orig_bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags, orig_bytes, 1);
+ ret = 0;
+ }
+
+ /*
+ * If we couldn't make a reservation then setup our reservation ticket
+ * and kick the async worker if it's not already running.
+ *
+ * If we are a priority flusher then we just need to add our ticket to
+ * the list and we will do our own flushing further down.
+ */
+ if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
+ ticket.orig_bytes = orig_bytes;
+ ticket.bytes = orig_bytes;
+ ticket.error = 0;
+ init_waitqueue_head(&ticket.wait);
+ if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+ list_add_tail(&ticket.list, &space_info->tickets);
+ if (!space_info->flush) {
+ space_info->flush = 1;
+ trace_btrfs_trigger_flush(fs_info,
+ space_info->flags,
+ orig_bytes, flush,
+ "enospc");
+ queue_work(system_unbound_wq,
+ &fs_info->async_reclaim_work);
+ }
+ } else {
+ list_add_tail(&ticket.list,
+ &space_info->priority_tickets);
+ }
+ } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ used += orig_bytes;
+ /*
+ * We will do the space reservation dance during log replay,
+ * which means we won't have fs_info->fs_root set, so don't do
+ * the async reclaim as we will panic.
+ */
+ if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
+ need_do_async_reclaim(fs_info, space_info,
+ used, system_chunk) &&
+ !work_busy(&fs_info->async_reclaim_work)) {
+ trace_btrfs_trigger_flush(fs_info, space_info->flags,
+ orig_bytes, flush, "preempt");
+ queue_work(system_unbound_wq,
+ &fs_info->async_reclaim_work);
+ }
+ }
+ spin_unlock(&space_info->lock);
+ if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
+ return ret;
+
+ if (flush == BTRFS_RESERVE_FLUSH_ALL)
+ return wait_reserve_ticket(fs_info, space_info, &ticket);
+
+ ret = 0;
+ priority_reclaim_metadata_space(fs_info, space_info, &ticket);
+ spin_lock(&space_info->lock);
+ if (ticket.bytes) {
+ if (ticket.bytes < orig_bytes)
+ reclaim_bytes = orig_bytes - ticket.bytes;
+ list_del_init(&ticket.list);
+ ret = -ENOSPC;
+ }
+ spin_unlock(&space_info->lock);
+
+ if (reclaim_bytes)
+ btrfs_space_info_add_old_bytes(fs_info, space_info,
+ reclaim_bytes);
+ ASSERT(list_empty(&ticket.list));
+ return ret;
+}
+
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @block_rsv - the block_rsv we're allocating for
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orig_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ int ret;
+ bool system_chunk = (root == fs_info->chunk_root);
+
+ ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
+ orig_bytes, flush, system_chunk);
+ if (ret == -ENOSPC &&
+ unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
+ if (block_rsv != global_rsv &&
+ !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes))
+ ret = 0;
+ }
+ if (ret == -ENOSPC) {
+ trace_btrfs_space_reservation(fs_info, "space_info:enospc",
+ block_rsv->space_info->flags,
+ orig_bytes, 1);
+
+ if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
+ btrfs_dump_space_info(fs_info, block_rsv->space_info,
+ orig_bytes, 0);
+ }
+ return ret;
+}