From 3c17916510428dbccdf657de050c34e208347089 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 8 Feb 2021 10:26:54 +0200 Subject: btrfs: fix race between extent freeing/allocation when using bitmaps During allocation the allocator will try to allocate an extent using cluster policy. Once the current cluster is exhausted it will remove the entry under btrfs_free_cluster::lock and subsequently acquire btrfs_free_space_ctl::tree_lock to dispose of the already-deleted entry and adjust btrfs_free_space_ctl::total_bitmap. This poses a problem because there exists a race condition between removing the entry under one lock and doing the necessary accounting holding a different lock since extent freeing only uses the 2nd lock. This can result in the following situation: T1: T2: btrfs_alloc_from_cluster insert_into_bitmap if (entry->bytes == 0) if (block_group && !list_empty(&block_group->cluster_list)) { rb_erase(entry) spin_unlock(&cluster->lock); (total_bitmaps is still 4) spin_lock(&cluster->lock); root> spin_lock(&ctl->tree_lock); recalculate_thresholds To fix this ensure that once depleted, the cluster entry is deleted when both cluster lock and tree locks are held in the allocator (T1), this ensures that even if there is a race with a concurrent insert_into_bitmap call it will correctly find the entry in the cluster and add the new space to it. CC: # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5400294bd271..abcf951e6b44 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3125,8 +3125,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, entry->bytes -= bytes; } - if (entry->bytes == 0) - rb_erase(&entry->offset_index, &cluster->root); break; } out: @@ -3143,7 +3141,10 @@ out: ctl->free_space -= bytes; if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; + + spin_lock(&cluster->lock); if (entry->bytes == 0) { + rb_erase(&entry->offset_index, &cluster->root); ctl->free_extents--; if (entry->bitmap) { kmem_cache_free(btrfs_free_space_bitmap_cachep, @@ -3156,6 +3157,7 @@ out: kmem_cache_free(btrfs_free_space_cachep, entry); } + spin_unlock(&cluster->lock); spin_unlock(&ctl->tree_lock); return ret; -- cgit From 95c85fba1f64c3249c67f0078a29f8a125078189 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Jan 2021 16:42:35 -0500 Subject: btrfs: avoid double put of block group when emptying cluster It's wrong calling btrfs_put_block_group in __btrfs_return_cluster_to_free_space if the block group passed is different than the block group the cluster represents. As this means the cluster doesn't have a reference to the passed block group. This results in double put and a use-after-free bug. Fix this by simply bailing if the block group we passed in does not match the block group on the cluster. Fixes: fa9c0d795f7b ("Btrfs: rework allocation clustering") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index abcf951e6b44..711a6a751ae9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2801,8 +2801,10 @@ static void __btrfs_return_cluster_to_free_space( struct rb_node *node; spin_lock(&cluster->lock); - if (cluster->block_group != block_group) - goto out; + if (cluster->block_group != block_group) { + spin_unlock(&cluster->lock); + return; + } cluster->block_group = NULL; cluster->window_start = 0; @@ -2840,8 +2842,6 @@ static void __btrfs_return_cluster_to_free_space( entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; - -out: spin_unlock(&cluster->lock); btrfs_put_block_group(block_group); } -- cgit