diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 331 |
1 files changed, 179 insertions, 152 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c1618ab9fecf..e08d0d45af4f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -27,7 +27,7 @@ #include <linux/ratelimit.h> #include <linux/percpu_counter.h> #include <linux/lockdep.h> -#include "hash.h" +#include <linux/crc32c.h> #include "tree-log.h" #include "disk-io.h" #include "print-tree.h" @@ -535,13 +535,11 @@ static noinline void caching_thread(struct btrfs_work *work) struct btrfs_block_group_cache *block_group; struct btrfs_fs_info *fs_info; struct btrfs_caching_control *caching_ctl; - struct btrfs_root *extent_root; int ret; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; fs_info = block_group->fs_info; - extent_root = fs_info->extent_root; mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); @@ -1203,11 +1201,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) __le64 lenum; lenum = cpu_to_le64(root_objectid); - high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); + high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(owner); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(offset); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); return ((u64)high_crc << 31) ^ (u64)low_crc; } @@ -2652,9 +2650,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, * Returns -ENOMEM or -EIO on failure and will abort the transaction. */ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, unsigned long nr) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_head *locked_ref = NULL; @@ -2994,7 +2992,7 @@ static void delayed_ref_async_start(struct btrfs_work *work) if (trans->transid > async->transid) goto end; - ret = btrfs_run_delayed_refs(trans, fs_info, async->count); + ret = btrfs_run_delayed_refs(trans, async->count); if (ret) async->error = ret; end: @@ -3053,8 +3051,9 @@ int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, * Returns <0 on error and aborts the transaction */ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, unsigned long count) + unsigned long count) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_head *head; @@ -3078,7 +3077,7 @@ again: delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); #endif trans->can_flush_pending_bgs = false; - ret = __btrfs_run_delayed_refs(trans, fs_info, count); + ret = __btrfs_run_delayed_refs(trans, count); if (ret < 0) { btrfs_abort_transaction(trans, ret); return ret; @@ -3086,7 +3085,7 @@ again: if (run_all) { if (!list_empty(&trans->new_bgs)) - btrfs_create_pending_block_groups(trans, fs_info); + btrfs_create_pending_block_groups(trans); spin_lock(&delayed_refs->lock); node = rb_first(&delayed_refs->href_root); @@ -3660,9 +3659,9 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, * the commit latency by getting rid of the easy block groups while * we're still allowing others to join the commit. */ -int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_cache *cache; struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; @@ -3686,7 +3685,7 @@ again: * make sure all the block groups on our dirty list actually * exist */ - btrfs_create_pending_block_groups(trans, fs_info); + btrfs_create_pending_block_groups(trans); if (!path) { path = btrfs_alloc_path(); @@ -3741,8 +3740,9 @@ again: should_put = 0; /* - * the cache_write_mutex is protecting - * the io_list + * The cache_write_mutex is protecting the + * io_list, also refer to the definition of + * btrfs_transaction::io_bgs for more details */ list_add_tail(&cache->io_list, io); } else { @@ -3800,7 +3800,7 @@ again: * go through delayed refs for all the stuff we've just kicked off * and then loop back (just once) */ - ret = btrfs_run_delayed_refs(trans, fs_info, 0); + ret = btrfs_run_delayed_refs(trans, 0); if (!ret && loops == 0) { loops++; spin_lock(&cur_trans->dirty_bgs_lock); @@ -3882,7 +3882,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, cache_save_setup(cache, trans, path); if (!ret) - ret = btrfs_run_delayed_refs(trans, fs_info, + ret = btrfs_run_delayed_refs(trans, (unsigned long) -1); if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { @@ -3934,6 +3934,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, } spin_unlock(&cur_trans->dirty_bgs_lock); + /* + * Refer to the definition of io_bgs member for details why it's safe + * to use it without any locking + */ while (!list_empty(io)) { cache = list_first_entry(io, struct btrfs_block_group_cache, io_list); @@ -3990,7 +3994,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) bg = btrfs_lookup_block_group(fs_info, bytenr); ASSERT(bg); if (atomic_dec_and_test(&bg->nocow_writers)) - wake_up_atomic_t(&bg->nocow_writers); + wake_up_var(&bg->nocow_writers); /* * Once for our lookup and once for the lookup done by a previous call * to btrfs_inc_nocow_writers() @@ -4001,8 +4005,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) { - wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait, - TASK_UNINTERRUPTIBLE); + wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); } static const char *alloc_name(u64 flags) @@ -4333,8 +4336,7 @@ again: /* commit the current transaction and try again */ commit_trans: - if (need_commit && - !atomic_read(&fs_info->open_ioctl_trans)) { + if (need_commit) { need_commit--; if (need_commit > 0) { @@ -4542,7 +4544,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, * Needed because we can end up allocating a system chunk and for an * atomic and race free space reservation in the chunk block reserve. */ - ASSERT(mutex_is_locked(&fs_info->chunk_mutex)); + lockdep_assert_held(&fs_info->chunk_mutex); info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); spin_lock(&info->lock); @@ -4603,11 +4605,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, return -ENOSPC; space_info = __find_space_info(fs_info, flags); - if (!space_info) { - ret = create_space_info(fs_info, flags, &space_info); - if (ret) - return ret; - } + ASSERT(space_info); again: spin_lock(&space_info->lock); @@ -4706,7 +4704,7 @@ out: */ if (trans->can_flush_pending_bgs && trans->chunk_bytes_reserved >= (u64)SZ_2M) { - btrfs_create_pending_block_groups(trans, fs_info); + btrfs_create_pending_block_groups(trans); btrfs_trans_release_chunk_metadata(trans); } return ret; @@ -4827,7 +4825,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, long time_left; unsigned long nr_pages; int loops; - enum btrfs_reserve_flush_enum flush; /* Calc the number of the pages we need flush for space reservation */ items = calc_reclaim_items_nr(fs_info, to_reclaim); @@ -4868,10 +4865,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, atomic_read(&fs_info->async_delalloc_pages) <= (int)max_reclaim); skip_async: - if (!trans) - flush = BTRFS_RESERVE_FLUSH_ALL; - else - flush = BTRFS_RESERVE_NO_FLUSH; spin_lock(&space_info->lock); if (list_empty(&space_info->tickets) && list_empty(&space_info->priority_tickets)) { @@ -4994,7 +4987,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, ret = PTR_ERR(trans); break; } - ret = btrfs_run_delayed_items_nr(trans, fs_info, nr); + ret = btrfs_run_delayed_items_nr(trans, nr); btrfs_end_transaction(trans); break; case FLUSH_DELALLOC: @@ -5389,10 +5382,15 @@ static int reserve_metadata_bytes(struct btrfs_root *root, !block_rsv_use_bytes(global_rsv, orig_bytes)) ret = 0; } - if (ret == -ENOSPC) + if (ret == -ENOSPC) { trace_btrfs_space_reservation(fs_info, "space_info:enospc", block_rsv->space_info->flags, orig_bytes, 1); + + if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) + dump_space_info(fs_info, block_rsv->space_info, + orig_bytes, 0); + } return ret; } @@ -5761,6 +5759,9 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, if (num_bytes == 0) return 0; + ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + if (ret) + return ret; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 0); @@ -5773,11 +5774,15 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, /** * btrfs_inode_rsv_release - release any excessive reservation. * @inode - the inode we need to release from. + * @qgroup_free - free or convert qgroup meta. + * Unlike normal operation, qgroup meta reservation needs to know if we are + * freeing qgroup reservation or just converting it into per-trans. Normally + * @qgroup_free is true for error handling, and false for normal release. * * This is the same as btrfs_block_rsv_release, except that it handles the * tracepoint for the reservation. */ -static void btrfs_inode_rsv_release(struct btrfs_inode *inode) +static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; @@ -5793,6 +5798,10 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode) if (released > 0) trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), released, 0); + if (qgroup_free) + btrfs_qgroup_free_meta_prealloc(inode->root, released); + else + btrfs_qgroup_convert_reserved_meta(inode->root, released); } void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, @@ -5893,24 +5902,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) WARN_ON(fs_info->delayed_block_rsv.reserved > 0); } -void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - if (!trans->block_rsv) { - ASSERT(!trans->bytes_reserved); - return; - } - - if (!trans->bytes_reserved) - return; - - ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); - trace_btrfs_space_reservation(fs_info, "transaction", - trans->transid, trans->bytes_reserved, 0); - btrfs_block_rsv_release(fs_info, trans->block_rsv, - trans->bytes_reserved); - trans->bytes_reserved = 0; -} /* * To be called after all the new block groups attached to the transaction @@ -5952,7 +5943,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, */ u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), + trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), num_bytes, 1); return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); } @@ -5996,7 +5987,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { /* One for parent inode, two for dir entries */ num_bytes = 3 * fs_info->nodesize; - ret = btrfs_qgroup_reserve_meta(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); if (ret) return ret; } else { @@ -6015,7 +6006,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1); if (ret && *qgroup_reserved) - btrfs_qgroup_free_meta(root, *qgroup_reserved); + btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved); return ret; } @@ -6052,7 +6043,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); - struct btrfs_root *root = inode->root; unsigned nr_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; @@ -6069,13 +6059,13 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) if (btrfs_is_free_space_inode(inode)) { flush = BTRFS_RESERVE_NO_FLUSH; delalloc_lock = false; - } else if (current->journal_info) { - flush = BTRFS_RESERVE_FLUSH_LIMIT; - } + } else { + if (current->journal_info) + flush = BTRFS_RESERVE_FLUSH_LIMIT; - if (flush != BTRFS_RESERVE_NO_FLUSH && - btrfs_transaction_in_commit(fs_info)) - schedule_timeout(1); + if (btrfs_transaction_in_commit(fs_info)) + schedule_timeout(1); + } if (delalloc_lock) mutex_lock(&inode->delalloc_mutex); @@ -6090,19 +6080,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { - ret = btrfs_qgroup_reserve_meta(root, - nr_extents * fs_info->nodesize, true); - if (ret) - goto out_fail; - } - ret = btrfs_inode_rsv_refill(inode, flush); - if (unlikely(ret)) { - btrfs_qgroup_free_meta(root, - nr_extents * fs_info->nodesize); + if (unlikely(ret)) goto out_fail; - } if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); @@ -6116,7 +6096,7 @@ out_fail: btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); - btrfs_inode_rsv_release(inode); + btrfs_inode_rsv_release(inode, true); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return ret; @@ -6126,12 +6106,14 @@ out_fail: * btrfs_delalloc_release_metadata - release a metadata reservation for an inode * @inode: the inode to release the reservation for. * @num_bytes: the number of bytes we are releasing. + * @qgroup_free: free qgroup reservation or convert it to per-trans reservation * * This will release the metadata reservation for an inode. This can be called * once we complete IO for a given set of bytes to release their metadata * reservations, or on error for the same reason. */ -void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) +void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, + bool qgroup_free) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); @@ -6144,13 +6126,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) if (btrfs_is_testing(fs_info)) return; - btrfs_inode_rsv_release(inode); + btrfs_inode_rsv_release(inode, qgroup_free); } /** * btrfs_delalloc_release_extents - release our outstanding_extents * @inode: the inode to balance the reservation for. * @num_bytes: the number of bytes we originally reserved with + * @qgroup_free: do we need to free qgroup meta reservation or convert them. * * When we reserve space we increase outstanding_extents for the extents we may * add. Once we've set the range as delalloc or created our ordered extents we @@ -6158,7 +6141,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) * temporarily tracked outstanding_extents. This _must_ be used in conjunction * with btrfs_delalloc_reserve_metadata. */ -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, + bool qgroup_free) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); unsigned num_extents; @@ -6172,7 +6156,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) if (btrfs_is_testing(fs_info)) return; - btrfs_inode_rsv_release(inode); + btrfs_inode_rsv_release(inode, qgroup_free); } /** @@ -6228,9 +6212,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode, */ void btrfs_delalloc_release_space(struct inode *inode, struct extent_changeset *reserved, - u64 start, u64 len) + u64 start, u64 len, bool qgroup_free) { - btrfs_delalloc_release_metadata(BTRFS_I(inode), len); + btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free); btrfs_free_reserved_data_space(inode, reserved, start, len); } @@ -6526,7 +6510,7 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, bg = btrfs_lookup_block_group(fs_info, start); ASSERT(bg); if (atomic_dec_and_test(&bg->reservations)) - wake_up_atomic_t(&bg->reservations); + wake_up_var(&bg->reservations); btrfs_put_block_group(bg); } @@ -6552,8 +6536,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) down_write(&space_info->groups_sem); up_write(&space_info->groups_sem); - wait_on_atomic_t(&bg->reservations, atomic_t_wait, - TASK_UNINTERRUPTIBLE); + wait_var_event(&bg->reservations, !atomic_read(&bg->reservations)); } /** @@ -6785,9 +6768,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_cache *block_group, *tmp; struct list_head *deleted_bgs; struct extent_io_tree *unpin; @@ -7353,29 +7336,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) return ret; } -int __get_raid_index(u64 flags) -{ - if (flags & BTRFS_BLOCK_GROUP_RAID10) - return BTRFS_RAID_RAID10; - else if (flags & BTRFS_BLOCK_GROUP_RAID1) - return BTRFS_RAID_RAID1; - else if (flags & BTRFS_BLOCK_GROUP_DUP) - return BTRFS_RAID_DUP; - else if (flags & BTRFS_BLOCK_GROUP_RAID0) - return BTRFS_RAID_RAID0; - else if (flags & BTRFS_BLOCK_GROUP_RAID5) - return BTRFS_RAID_RAID5; - else if (flags & BTRFS_BLOCK_GROUP_RAID6) - return BTRFS_RAID_RAID6; - - return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ -} - -int get_block_group_index(struct btrfs_block_group_cache *cache) -{ - return __get_raid_index(cache->flags); -} - static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = "raid10", [BTRFS_RAID_RAID1] = "raid1", @@ -7490,7 +7450,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, u64 empty_cluster = 0; struct btrfs_space_info *space_info; int loop = 0; - int index = __get_raid_index(flags); + int index = btrfs_bg_flags_to_raid_index(flags); bool failed_cluster_refill = false; bool failed_alloc = false; bool use_cluster = true; @@ -7576,7 +7536,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, btrfs_put_block_group(block_group); up_read(&space_info->groups_sem); } else { - index = get_block_group_index(block_group); + index = btrfs_bg_flags_to_raid_index( + block_group->flags); btrfs_lock_block_group(block_group, delalloc); goto have_block_group; } @@ -7586,7 +7547,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, } search: have_caching_bg = false; - if (index == 0 || index == __get_raid_index(flags)) + if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags)) full_search = true; down_read(&space_info->groups_sem); list_for_each_entry(block_group, &space_info->block_groups[index], @@ -7844,7 +7805,8 @@ checks: loop: failed_cluster_refill = false; failed_alloc = false; - BUG_ON(index != get_block_group_index(block_group)); + BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) != + index); btrfs_release_block_group(block_group, delalloc); cond_resched(); } @@ -7998,6 +7960,51 @@ again: up_read(&info->groups_sem); } +/* + * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a + * hole that is at least as big as @num_bytes. + * + * @root - The root that will contain this extent + * + * @ram_bytes - The amount of space in ram that @num_bytes take. This + * is used for accounting purposes. This value differs + * from @num_bytes only in the case of compressed extents. + * + * @num_bytes - Number of bytes to allocate on-disk. + * + * @min_alloc_size - Indicates the minimum amount of space that the + * allocator should try to satisfy. In some cases + * @num_bytes may be larger than what is required and if + * the filesystem is fragmented then allocation fails. + * However, the presence of @min_alloc_size gives a + * chance to try and satisfy the smaller allocation. + * + * @empty_size - A hint that you plan on doing more COW. This is the + * size in bytes the allocator should try to find free + * next to the block it returns. This is just a hint and + * may be ignored by the allocator. + * + * @hint_byte - Hint to the allocator to start searching above the byte + * address passed. It might be ignored. + * + * @ins - This key is modified to record the found hole. It will + * have the following values: + * ins->objectid == start position + * ins->flags = BTRFS_EXTENT_ITEM_KEY + * ins->offset == the size of the hole. + * + * @is_data - Boolean flag indicating whether an extent is + * allocated for data (true) or metadata (false) + * + * @delalloc - Boolean flag indicating whether this allocation is for + * delalloc or not. If 'true' data_rwsem of block groups + * is going to be acquired. + * + * + * Returns 0 when an allocation succeeded or < 0 when an error occurred. In + * case -ENOSPC is returned then @ins->offset will contain the size of the + * largest available hole the allocator managed to find. + */ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, @@ -8701,6 +8708,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 parent; u32 blocksize; struct btrfs_key key; + struct btrfs_key first_key; struct extent_buffer *next; int level = wc->level; int reada = 0; @@ -8721,6 +8729,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); + btrfs_node_key_to_cpu(path->nodes[level], &first_key, + path->slots[level]); blocksize = fs_info->nodesize; next = find_extent_buffer(fs_info, bytenr); @@ -8785,7 +8795,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (!next) { if (reada && level == 1) reada_walk_down(trans, root, wc, path); - next = read_tree_block(fs_info, bytenr, generation); + next = read_tree_block(fs_info, bytenr, generation, level - 1, + &first_key); if (IS_ERR(next)) { return PTR_ERR(next); } else if (!extent_buffer_uptodate(next)) { @@ -9650,7 +9661,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) */ target = get_restripe_target(fs_info, block_group->flags); if (target) { - index = __get_raid_index(extended_to_chunk(target)); + index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target)); } else { /* * this is just a balance, so if we were marked as full @@ -9664,7 +9675,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) goto out; } - index = get_block_group_index(block_group); + index = btrfs_bg_flags_to_raid_index(block_group->flags); } if (index == BTRFS_RAID_RAID10) { @@ -9913,10 +9924,40 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +/* link_block_group will queue up kobjects to add when we're reclaim-safe */ +void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *space_info; + struct raid_kobject *rkobj; + LIST_HEAD(list); + int index; + int ret = 0; + + spin_lock(&fs_info->pending_raid_kobjs_lock); + list_splice_init(&fs_info->pending_raid_kobjs, &list); + spin_unlock(&fs_info->pending_raid_kobjs_lock); + + list_for_each_entry(rkobj, &list, list) { + space_info = __find_space_info(fs_info, rkobj->flags); + index = btrfs_bg_flags_to_raid_index(rkobj->flags); + + ret = kobject_add(&rkobj->kobj, &space_info->kobj, + "%s", get_raid_name(index)); + if (ret) { + kobject_put(&rkobj->kobj); + break; + } + } + if (ret) + btrfs_warn(fs_info, + "failed to add kobject for block cache, ignoring"); +} + static void link_block_group(struct btrfs_block_group_cache *cache) { struct btrfs_space_info *space_info = cache->space_info; - int index = get_block_group_index(cache); + struct btrfs_fs_info *fs_info = cache->fs_info; + int index = btrfs_bg_flags_to_raid_index(cache->flags); bool first = false; down_write(&space_info->groups_sem); @@ -9926,27 +9967,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache) up_write(&space_info->groups_sem); if (first) { - struct raid_kobject *rkobj; - int ret; - - rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); - if (!rkobj) - goto out_err; - rkobj->raid_type = index; - kobject_init(&rkobj->kobj, &btrfs_raid_ktype); - ret = kobject_add(&rkobj->kobj, &space_info->kobj, - "%s", get_raid_name(index)); - if (ret) { - kobject_put(&rkobj->kobj); - goto out_err; + struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); + if (!rkobj) { + btrfs_warn(cache->fs_info, + "couldn't alloc memory for raid level kobject"); + return; } + rkobj->flags = cache->flags; + kobject_init(&rkobj->kobj, &btrfs_raid_ktype); + + spin_lock(&fs_info->pending_raid_kobjs_lock); + list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs); + spin_unlock(&fs_info->pending_raid_kobjs_lock); space_info->block_group_kobjs[index] = &rkobj->kobj; } - - return; -out_err: - btrfs_warn(cache->fs_info, - "failed to add kobject for block cache, ignoring"); } static struct btrfs_block_group_cache * @@ -10162,6 +10196,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) inc_block_group_ro(cache, 1); } + btrfs_add_raid_kobjects(info); init_global_block_rsv(info); ret = 0; error: @@ -10169,9 +10204,9 @@ error: return ret; } -void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_cache *block_group, *tmp; struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_item item; @@ -10256,15 +10291,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, * with its ->space_info set. */ cache->space_info = __find_space_info(fs_info, cache->flags); - if (!cache->space_info) { - ret = create_space_info(fs_info, cache->flags, - &cache->space_info); - if (ret) { - btrfs_remove_free_space_cache(cache); - btrfs_put_block_group(cache); - return ret; - } - } + ASSERT(cache->space_info); ret = btrfs_add_block_group_cache(fs_info, cache); if (ret) { @@ -10336,7 +10363,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->key.offset); memcpy(&key, &block_group->key, sizeof(key)); - index = get_block_group_index(block_group); + index = btrfs_bg_flags_to_raid_index(block_group->flags); if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) @@ -11061,7 +11088,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) ret = btrfs_start_write_no_snapshotting(root); if (ret) break; - wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait, - TASK_UNINTERRUPTIBLE); + wait_var_event(&root->will_be_snapshotted, + !atomic_read(&root->will_be_snapshotted)); } } |