summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c128
1 files changed, 83 insertions, 45 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f0b6d1936e8..8da2f380d3c0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -279,6 +279,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
len = buf->len - offset;
while (len > 0) {
+ /*
+ * Note: we don't need to check for the err == 1 case here, as
+ * with the given combination of 'start = BTRFS_CSUM_SIZE (32)'
+ * and 'min_len = 32' and the currently implemented mapping
+ * algorithm we cannot cross a page boundary.
+ */
err = map_private_extent_buffer(buf, offset, 32,
&kaddr, &map_start, &map_len);
if (err)
@@ -477,9 +483,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
int mirror_num = 0;
int failed_mirror = 0;
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
+ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
mirror_num);
if (!ret) {
@@ -493,15 +499,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
break;
}
- /*
- * This buffer's crc is fine, but its contents are corrupted, so
- * there is no reason to read the other copies, they won't be
- * any less wrong.
- */
- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
- ret == -EUCLEAN)
- break;
-
num_copies = btrfs_num_copies(fs_info,
eb->start, eb->len);
if (num_copies == 1)
@@ -551,7 +548,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
if (WARN_ON(!PageUptodate(page)))
return -EUCLEAN;
- ASSERT(memcmp_extent_buffer(eb, fs_info->fsid,
+ ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
return csum_tree_block(fs_info, eb, 0);
@@ -566,7 +563,20 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
while (fs_devices) {
- if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
+ u8 *metadata_uuid;
+
+ /*
+ * Checking the incompat flag is only valid for the current
+ * fs. For seed devices it's forbidden to have their uuid
+ * changed so reading ->fsid in this case is fine
+ */
+ if (fs_devices == fs_info->fs_devices &&
+ btrfs_fs_incompat(fs_info, METADATA_UUID))
+ metadata_uuid = fs_devices->metadata_uuid;
+ else
+ metadata_uuid = fs_devices->fsid;
+
+ if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE)) {
ret = 0;
break;
}
@@ -669,19 +679,6 @@ out:
return ret;
}
-static int btree_io_failed_hook(struct page *page, int failed_mirror)
-{
- struct extent_buffer *eb;
-
- eb = (struct extent_buffer *)page->private;
- set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
- eb->read_mirror = failed_mirror;
- atomic_dec(&eb->io_pages);
- if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(eb, -EIO);
- return -EIO; /* we fixed nothing */
-}
-
static void end_workqueue_bio(struct bio *bio)
{
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
@@ -760,11 +757,22 @@ static void run_one_async_start(struct btrfs_work *work)
async->status = ret;
}
+/*
+ * In order to insert checksums into the metadata in large chunks, we wait
+ * until bio submission time. All the pages in the bio are checksummed and
+ * sums are attached onto the ordered extent record.
+ *
+ * At IO completion time the csums attached on the ordered extent record are
+ * inserted into the tree.
+ */
static void run_one_async_done(struct btrfs_work *work)
{
struct async_submit_bio *async;
+ struct inode *inode;
+ blk_status_t ret;
async = container_of(work, struct async_submit_bio, work);
+ inode = async->private_data;
/* If an error occurred we just want to clean up the bio and move on */
if (async->status) {
@@ -773,7 +781,12 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
- btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num);
+ ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio,
+ async->mirror_num, 1);
+ if (ret) {
+ async->bio->bi_status = ret;
+ bio_endio(async->bio);
+ }
}
static void run_one_async_free(struct btrfs_work *work)
@@ -1187,6 +1200,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
refcount_set(&root->refs, 1);
atomic_set(&root->will_be_snapshotted, 0);
atomic_set(&root->snapshot_force_cow, 0);
+ atomic_set(&root->nr_swapfiles, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
@@ -2127,10 +2141,8 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
{
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
- rwlock_init(&fs_info->dev_replace.lock);
- atomic_set(&fs_info->dev_replace.blocking_readers, 0);
+ init_rwsem(&fs_info->dev_replace.rwsem);
init_waitqueue_head(&fs_info->dev_replace.replace_wait);
- init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
}
static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
@@ -2451,10 +2463,11 @@ static int validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
- if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+ if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
+ BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
- "dev_item UUID does not match fsid: %pU != %pU",
- fs_info->fsid, sb->dev_item.fsid);
+ "dev_item UUID does not match metadata fsid: %pU != %pU",
+ fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
ret = -EINVAL;
}
@@ -2665,6 +2678,9 @@ int open_ctree(struct super_block *sb,
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
BTRFS_BLOCK_RSV_DELOPS);
+ btrfs_init_block_rsv(&fs_info->delayed_refs_rsv,
+ BTRFS_BLOCK_RSV_DELREFS);
+
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
@@ -2754,6 +2770,9 @@ int open_ctree(struct super_block *sb,
fs_info->sectorsize = 4096;
fs_info->stripesize = 4096;
+ spin_lock_init(&fs_info->swapfile_pins_lock);
+ fs_info->swapfile_pins = RB_ROOT;
+
ret = btrfs_alloc_stripe_hash_table(fs_info);
if (ret) {
err = ret;
@@ -2790,11 +2809,29 @@ int open_ctree(struct super_block *sb,
* the whole block of INFO_SIZE
*/
memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
- memcpy(fs_info->super_for_commit, fs_info->super_copy,
- sizeof(*fs_info->super_for_commit));
brelse(bh);
- memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
+ disk_super = fs_info->super_copy;
+
+ ASSERT(!memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
+ BTRFS_FSID_SIZE));
+
+ if (btrfs_fs_incompat(fs_info, METADATA_UUID)) {
+ ASSERT(!memcmp(fs_info->fs_devices->metadata_uuid,
+ fs_info->super_copy->metadata_uuid,
+ BTRFS_FSID_SIZE));
+ }
+
+ features = btrfs_super_flags(disk_super);
+ if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) {
+ features &= ~BTRFS_SUPER_FLAG_CHANGING_FSID_V2;
+ btrfs_set_super_flags(disk_super, features);
+ btrfs_info(fs_info,
+ "found metadata UUID change in progress flag, clearing");
+ }
+
+ memcpy(fs_info->super_for_commit, fs_info->super_copy,
+ sizeof(*fs_info->super_for_commit));
ret = btrfs_validate_mount_super(fs_info);
if (ret) {
@@ -2803,7 +2840,6 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
- disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
goto fail_alloc;
@@ -2915,7 +2951,7 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE);
+ memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(fs_info);
@@ -3064,7 +3100,7 @@ retry_root_backup:
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
- "writeable mount is not allowed due to too many missing devices");
+ "writable mount is not allowed due to too many missing devices");
goto fail_sysfs;
}
@@ -3733,7 +3769,8 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
btrfs_set_stack_device_io_width(dev_item, dev->io_width);
btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
- memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_FSID_SIZE);
+ memcpy(dev_item->fsid, dev->fs_devices->metadata_uuid,
+ BTRFS_FSID_SIZE);
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
@@ -4040,7 +4077,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/*
* This is a fast path so only do this check if we have sanity tests
- * enabled. Normal people shouldn't be using umapped buffers as dirty
+ * enabled. Normal people shouldn't be using unmapped buffers as dirty
* outside of the sanity tests.
*/
if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
@@ -4338,6 +4375,8 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
unpin = pinned_extents;
again:
while (1) {
+ struct extent_state *cached_state = NULL;
+
/*
* The btrfs_finish_extent_commit() may get the same range as
* ours between find_first_extent_bit and clear_extent_dirty.
@@ -4346,13 +4385,14 @@ again:
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, NULL);
+ EXTENT_DIRTY, &cached_state);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
- clear_extent_dirty(unpin, start, end);
+ clear_extent_dirty(unpin, start, end, &cached_state);
+ free_extent_state(cached_state);
btrfs_error_unpin_extent_range(fs_info, start, end);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
@@ -4409,6 +4449,7 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
spin_unlock(&cur_trans->dirty_bgs_lock);
btrfs_put_block_group(cache);
+ btrfs_delayed_refs_rsv_release(fs_info, 1);
spin_lock(&cur_trans->dirty_bgs_lock);
}
spin_unlock(&cur_trans->dirty_bgs_lock);
@@ -4514,7 +4555,4 @@ static const struct extent_io_ops btree_extent_io_ops = {
/* mandatory callbacks */
.submit_bio_hook = btree_submit_bio_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
- .readpage_io_failed_hook = btree_io_failed_hook,
-
- /* optional callbacks */
};