diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-04 10:05:13 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-04 10:05:13 -0700 | 
| commit | 79eb2c07afbe4d165734ea61a258dd8410ec6624 (patch) | |
| tree | 40144fe701942830adcece5474846015afe87361 | |
| parent | b7a838ee7e8904c14e5d6ca2d0029bbad70fb761 (diff) | |
| parent | d6e7ac65d4c106149d08a0ffba39fc516ae3d21b (diff) | |
Merge tag 'for-6.12-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
 - in incremental send, fix invalid clone operation for file that got
   its size decreased
 - fix __counted_by() annotation of send path cache entries, we do not
   store the terminating NUL
 - fix a longstanding bug in relocation (and quite hard to hit by
   chance), drop back reference cache that can get out of sync after
   transaction commit
 - wait for fixup worker kthread before finishing umount
 - add missing raid-stripe-tree extent for NOCOW files, zoned mode
   cannot have NOCOW files but RST is meant to be a standalone feature
 - handle transaction start error during relocation, avoid potential
   NULL pointer dereference of relocation control structure (reported by
   syzbot)
 - disable module-wide rate limiting of debug level messages
 - minor fix to tracepoint definition (reported by checkpatch.pl)
* tag 'for-6.12-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: disable rate limiting when debug enabled
  btrfs: wait for fixup workers before stopping cleaner kthread during umount
  btrfs: fix a NULL pointer dereference when failed to start a new trasacntion
  btrfs: send: fix invalid clone operation for file that got its size decreased
  btrfs: tracepoints: end assignment with semicolon at btrfs_qgroup_extent event class
  btrfs: drop the backref cache during relocation if we commit
  btrfs: also add stripe entries for NOCOW writes
  btrfs: send: fix buffer overflow detection when copying path to cache entry
| -rw-r--r-- | fs/btrfs/backref.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 11 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/messages.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 77 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 31 | ||||
| -rw-r--r-- | include/trace/events/btrfs.h | 2 | 
7 files changed, 58 insertions, 83 deletions
| diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e2f478ecd7fd..f8e1d5b2c512 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3179,10 +3179,14 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)  		btrfs_backref_cleanup_node(cache, node);  	} -	cache->last_trans = 0; - -	for (i = 0; i < BTRFS_MAX_LEVEL; i++) -		ASSERT(list_empty(&cache->pending[i])); +	for (i = 0; i < BTRFS_MAX_LEVEL; i++) { +		while (!list_empty(&cache->pending[i])) { +			node = list_first_entry(&cache->pending[i], +						struct btrfs_backref_node, +						list); +			btrfs_backref_cleanup_node(cache, node); +		} +	}  	ASSERT(list_empty(&cache->pending_edge));  	ASSERT(list_empty(&cache->useless_node));  	ASSERT(list_empty(&cache->changed)); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 831fb901683c..4ad5db619b00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4256,6 +4256,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)  	btrfs_cleanup_defrag_inodes(fs_info);  	/* +	 * Wait for any fixup workers to complete. +	 * If we don't wait for them here and they are still running by the time +	 * we call kthread_stop() against the cleaner kthread further below, we +	 * get an use-after-free on the cleaner because the fixup worker adds an +	 * inode to the list of delayed iputs and then attempts to wakeup the +	 * cleaner kthread, which was already stopped and destroyed. We parked +	 * already the cleaner, but below we run all pending delayed iputs. +	 */ +	btrfs_flush_workqueue(fs_info->fixup_workers); + +	/*  	 * After we parked the cleaner kthread, ordered extents may have  	 * completed and created new delayed iputs. If one of the async reclaim  	 * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9122afcb712c..5618ca02934a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3111,6 +3111,11 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)  		ret = btrfs_update_inode_fallback(trans, inode);  		if (ret) /* -ENOMEM or corruption */  			btrfs_abort_transaction(trans, ret); + +		ret = btrfs_insert_raid_extent(trans, ordered_extent); +		if (ret) +			btrfs_abort_transaction(trans, ret); +  		goto out;  	} diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index 77752eec125d..363fd28c0268 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -239,7 +239,8 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,  	vaf.fmt = fmt;  	vaf.va = &args; -	if (__ratelimit(ratelimit)) { +	/* Do not ratelimit if CONFIG_BTRFS_DEBUG is enabled. */ +	if (IS_ENABLED(CONFIG_BTRFS_DEBUG) || __ratelimit(ratelimit)) {  		if (fs_info) {  			char statestr[STATE_STRING_BUF_LEN]; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ea4ed85919ec..f3834f8d26b4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -232,70 +232,6 @@ static struct btrfs_backref_node *walk_down_backref(  	return NULL;  } -static void update_backref_node(struct btrfs_backref_cache *cache, -				struct btrfs_backref_node *node, u64 bytenr) -{ -	struct rb_node *rb_node; -	rb_erase(&node->rb_node, &cache->rb_root); -	node->bytenr = bytenr; -	rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node); -	if (rb_node) -		btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST); -} - -/* - * update backref cache after a transaction commit - */ -static int update_backref_cache(struct btrfs_trans_handle *trans, -				struct btrfs_backref_cache *cache) -{ -	struct btrfs_backref_node *node; -	int level = 0; - -	if (cache->last_trans == 0) { -		cache->last_trans = trans->transid; -		return 0; -	} - -	if (cache->last_trans == trans->transid) -		return 0; - -	/* -	 * detached nodes are used to avoid unnecessary backref -	 * lookup. transaction commit changes the extent tree. -	 * so the detached nodes are no longer useful. -	 */ -	while (!list_empty(&cache->detached)) { -		node = list_entry(cache->detached.next, -				  struct btrfs_backref_node, list); -		btrfs_backref_cleanup_node(cache, node); -	} - -	while (!list_empty(&cache->changed)) { -		node = list_entry(cache->changed.next, -				  struct btrfs_backref_node, list); -		list_del_init(&node->list); -		BUG_ON(node->pending); -		update_backref_node(cache, node, node->new_bytenr); -	} - -	/* -	 * some nodes can be left in the pending list if there were -	 * errors during processing the pending nodes. -	 */ -	for (level = 0; level < BTRFS_MAX_LEVEL; level++) { -		list_for_each_entry(node, &cache->pending[level], list) { -			BUG_ON(!node->pending); -			if (node->bytenr == node->new_bytenr) -				continue; -			update_backref_node(cache, node, node->new_bytenr); -		} -	} - -	cache->last_trans = 0; -	return 1; -} -  static bool reloc_root_is_dead(const struct btrfs_root *root)  {  	/* @@ -551,9 +487,6 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,  	struct btrfs_backref_edge *new_edge;  	struct rb_node *rb_node; -	if (cache->last_trans > 0) -		update_backref_cache(trans, cache); -  	rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);  	if (rb_node) {  		node = rb_entry(rb_node, struct btrfs_backref_node, rb_node); @@ -923,7 +856,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,  	btrfs_grab_root(reloc_root);  	/* root->reloc_root will stay until current relocation finished */ -	if (fs_info->reloc_ctl->merge_reloc_tree && +	if (fs_info->reloc_ctl && fs_info->reloc_ctl->merge_reloc_tree &&  	    btrfs_root_refs(root_item) == 0) {  		set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);  		/* @@ -3698,11 +3631,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  			break;  		}  restart: -		if (update_backref_cache(trans, &rc->backref_cache)) { -			btrfs_end_transaction(trans); -			trans = NULL; -			continue; -		} +		if (rc->backref_cache.last_trans != trans->transid) +			btrfs_backref_release_cache(&rc->backref_cache); +		rc->backref_cache.last_trans = trans->transid;  		ret = find_next_extent(rc, path, &key);  		if (ret < 0) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7f48ba6c1c77..27306d98ec43 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -346,8 +346,10 @@ struct name_cache_entry {  	u64 parent_gen;  	int ret;  	int need_later_update; +	/* Name length without NUL terminator. */  	int name_len; -	char name[] __counted_by(name_len); +	/* Not NUL terminated. */ +	char name[] __counted_by(name_len) __nonstring;  };  /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ @@ -2388,7 +2390,7 @@ out_cache:  	/*  	 * Store the result of the lookup in the name cache.  	 */ -	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); +	nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL);  	if (!nce) {  		ret = -ENOMEM;  		goto out; @@ -2400,7 +2402,7 @@ out_cache:  	nce->parent_gen = *parent_gen;  	nce->name_len = fs_path_len(dest);  	nce->ret = ret; -	strcpy(nce->name, dest->start); +	memcpy(nce->name, dest->start, nce->name_len);  	if (ino < sctx->send_progress)  		nce->need_later_update = 0; @@ -6187,8 +6189,29 @@ static int send_write_or_clone(struct send_ctx *sctx,  	if (ret < 0)  		return ret; -	if (clone_root->offset + num_bytes == info.size) +	if (clone_root->offset + num_bytes == info.size) { +		/* +		 * The final size of our file matches the end offset, but it may +		 * be that its current size is larger, so we have to truncate it +		 * to any value between the start offset of the range and the +		 * final i_size, otherwise the clone operation is invalid +		 * because it's unaligned and it ends before the current EOF. +		 * We do this truncate to the final i_size when we finish +		 * processing the inode, but it's too late by then. And here we +		 * truncate to the start offset of the range because it's always +		 * sector size aligned while if it were the final i_size it +		 * would result in dirtying part of a page, filling part of a +		 * page with zeroes and then having the clone operation at the +		 * receiver trigger IO and wait for it due to the dirty page. +		 */ +		if (sctx->parent_root != NULL) { +			ret = send_truncate(sctx, sctx->cur_ino, +					    sctx->cur_inode_gen, offset); +			if (ret < 0) +				return ret; +		}  		goto clone_data; +	}  write_data:  	ret = send_extent_data(sctx, path, offset, num_bytes); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index bf60ad50011e..af6b3827fb1d 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1716,7 +1716,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,  	),  	TP_fast_assign_btrfs(fs_info, -		__entry->bytenr		= rec->bytenr, +		__entry->bytenr		= rec->bytenr;  		__entry->num_bytes	= rec->num_bytes;  	), | 
