diff options
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 11 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 74 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 7 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 26 | 
6 files changed, 114 insertions, 12 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a7521c87c88..8a42adb4e5ed 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,6 +44,17 @@  #define BTRFS_INODE_IN_DELALLOC_LIST		9  #define BTRFS_INODE_READDIO_NEED_LOCK		10  #define BTRFS_INODE_HAS_PROPS		        11 +/* + * The following 3 bits are meant only for the btree inode. + * When any of them is set, it means an error happened while writing an + * extent buffer belonging to: + * 1) a non-log btree + * 2) a log btree and first log sub-transaction + * 3) a log btree and second log sub-transaction + */ +#define BTRFS_INODE_BTREE_ERR		        12 +#define BTRFS_INODE_BTREE_LOG1_ERR		13 +#define BTRFS_INODE_BTREE_LOG2_ERR		14  /* in memory btrfs inode */  struct btrfs_inode { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7b..09b3c8a0c790 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -607,7 +607,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,  		goto err;  	eb->read_mirror = mirror; -	if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { +	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {  		ret = -EIO;  		goto err;  	} @@ -680,7 +680,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)  	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;  	eb = (struct extent_buffer *)page->private; -	set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); +	set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);  	eb->read_mirror = failed_mirror;  	atomic_dec(&eb->io_pages);  	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 44d04979f071..8ebe6bf66e78 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7235,17 +7235,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,  	btrfs_set_buffer_uptodate(buf);  	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { +		buf->log_index = root->log_transid % 2;  		/*  		 * we allow two log transactions at a time, use different  		 * EXENT bit to differentiate dirty pages.  		 */ -		if (root->log_transid % 2 == 0) +		if (buf->log_index == 0)  			set_extent_dirty(&root->dirty_log_pages, buf->start,  					buf->start + buf->len - 1, GFP_NOFS);  		else  			set_extent_new(&root->dirty_log_pages, buf->start,  					buf->start + buf->len - 1, GFP_NOFS);  	} else { +		buf->log_index = -1;  		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,  			 buf->start + buf->len - 1, GFP_NOFS);  	} diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4267a054b9c1..215603b911f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3601,6 +3601,68 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)  	wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);  } +static void set_btree_ioerr(struct page *page) +{ +	struct extent_buffer *eb = (struct extent_buffer *)page->private; +	struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); + +	SetPageError(page); +	if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) +		return; + +	/* +	 * If writeback for a btree extent that doesn't belong to a log tree +	 * failed, increment the counter transaction->eb_write_errors. +	 * We do this because while the transaction is running and before it's +	 * committing (when we call filemap_fdata[write|wait]_range against +	 * the btree inode), we might have +	 * btree_inode->i_mapping->a_ops->writepages() called by the VM - if it +	 * returns an error or an error happens during writeback, when we're +	 * committing the transaction we wouldn't know about it, since the pages +	 * can be no longer dirty nor marked anymore for writeback (if a +	 * subsequent modification to the extent buffer didn't happen before the +	 * transaction commit), which makes filemap_fdata[write|wait]_range not +	 * able to find the pages tagged with SetPageError at transaction +	 * commit time. So if this happens we must abort the transaction, +	 * otherwise we commit a super block with btree roots that point to +	 * btree nodes/leafs whose content on disk is invalid - either garbage +	 * or the content of some node/leaf from a past generation that got +	 * cowed or deleted and is no longer valid. +	 * +	 * Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would +	 * not be enough - we need to distinguish between log tree extents vs +	 * non-log tree extents, and the next filemap_fdatawait_range() call +	 * will catch and clear such errors in the mapping - and that call might +	 * be from a log sync and not from a transaction commit. Also, checking +	 * for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is +	 * not done and would not be reliable - the eb might have been released +	 * from memory and reading it back again means that flag would not be +	 * set (since it's a runtime flag, not persisted on disk). +	 * +	 * Using the flags below in the btree inode also makes us achieve the +	 * goal of AS_EIO/AS_ENOSPC when writepages() returns success, started +	 * writeback for all dirty pages and before filemap_fdatawait_range() +	 * is called, the writeback for all dirty pages had already finished +	 * with errors - because we were not using AS_EIO/AS_ENOSPC, +	 * filemap_fdatawait_range() would return success, as it could not know +	 * that writeback errors happened (the pages were no longer tagged for +	 * writeback). +	 */ +	switch (eb->log_index) { +	case -1: +		set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); +		break; +	case 0: +		set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); +		break; +	case 1: +		set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); +		break; +	default: +		BUG(); /* unexpected, logic error */ +	} +} +  static void end_bio_extent_buffer_writepage(struct bio *bio, int err)  {  	struct bio_vec *bvec; @@ -3614,10 +3676,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)  		BUG_ON(!eb);  		done = atomic_dec_and_test(&eb->io_pages); -		if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { -			set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); +		if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {  			ClearPageUptodate(page); -			SetPageError(page); +			set_btree_ioerr(page);  		}  		end_page_writeback(page); @@ -3644,7 +3705,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,  	int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;  	int ret = 0; -	clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); +	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);  	num_pages = num_extent_pages(eb->start, eb->len);  	atomic_set(&eb->io_pages, num_pages);  	if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) @@ -3661,8 +3722,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,  					 0, epd->bio_flags, bio_flags);  		epd->bio_flags = bio_flags;  		if (ret) { -			set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); -			SetPageError(p); +			set_btree_ioerr(p);  			end_page_writeback(p);  			if (atomic_sub_and_test(num_pages - i, &eb->io_pages))  				end_extent_buffer_writeback(eb); @@ -5055,7 +5115,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,  		goto unlock_exit;  	} -	clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); +	clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);  	eb->read_mirror = 0;  	atomic_set(&eb->io_pages, num_reads);  	for (i = start_i; i < num_pages; i++) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5e91fb9d1764..06f030c0084c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -41,9 +41,10 @@  #define EXTENT_BUFFER_TREE_REF 5  #define EXTENT_BUFFER_STALE 6  #define EXTENT_BUFFER_WRITEBACK 7 -#define EXTENT_BUFFER_IOERR 8 +#define EXTENT_BUFFER_READ_ERR 8        /* read IO error */  #define EXTENT_BUFFER_DUMMY 9  #define EXTENT_BUFFER_IN_TREE 10 +#define EXTENT_BUFFER_WRITE_ERR 11    /* write IO error */  /* these are flags for extent_clear_unlock_delalloc */  #define PAGE_UNLOCK		(1 << 0) @@ -141,7 +142,9 @@ struct extent_buffer {  	atomic_t blocking_readers;  	atomic_t spinning_readers;  	atomic_t spinning_writers; -	int lock_nested; +	short lock_nested; +	/* >= 0 if eb belongs to a log tree, -1 otherwise */ +	short log_index;  	/* protects write locks */  	rwlock_t lock; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 16d0c1b62b3e..a47b1000a6e5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -851,6 +851,8 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,  	struct extent_state *cached_state = NULL;  	u64 start = 0;  	u64 end; +	struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); +	bool errors = false;  	while (!find_first_extent_bit(dirty_pages, start, &start, &end,  				      EXTENT_NEED_WAIT, &cached_state)) { @@ -864,6 +866,26 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,  	}  	if (err)  		werr = err; + +	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { +		if ((mark & EXTENT_DIRTY) && +		    test_and_clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, +				       &btree_ino->runtime_flags)) +			errors = true; + +		if ((mark & EXTENT_NEW) && +		    test_and_clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, +				       &btree_ino->runtime_flags)) +			errors = true; +	} else { +		if (test_and_clear_bit(BTRFS_INODE_BTREE_ERR, +				       &btree_ino->runtime_flags)) +			errors = true; +	} + +	if (errors && !werr) +		werr = -EIO; +  	return werr;  } @@ -1629,6 +1651,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  {  	struct btrfs_transaction *cur_trans = trans->transaction;  	struct btrfs_transaction *prev_trans = NULL; +	struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode);  	int ret;  	/* Stop the commit early if ->aborted is set */ @@ -1871,6 +1894,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	btrfs_update_commit_device_size(root->fs_info);  	btrfs_update_commit_device_bytes_used(root, cur_trans); +	clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); +	clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); +  	spin_lock(&root->fs_info->trans_lock);  	cur_trans->state = TRANS_STATE_UNBLOCKED;  	root->fs_info->running_transaction = NULL;  | 
