summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c142
1 files changed, 92 insertions, 50 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 22a0439e5a86..a8f652dc940b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -45,6 +45,7 @@
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
+#include "dev-replace.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
break;
- num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+ num_copies = btrfs_num_copies(root->fs_info,
eb->start, eb->len);
if (num_copies == 1)
break;
@@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
+ int ret;
+
/*
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
*/
- return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+ if (ret)
+ bio_endio(bio, ret);
+ return ret;
}
static int check_async_write(struct inode *inode, unsigned long bio_flags)
@@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int ret;
if (!(rw & REQ_WRITE)) {
-
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
@@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
bio, 1);
if (ret)
- return ret;
- return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
+ goto out_w_error;
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+ mirror_num, 0);
} else if (!async) {
ret = btree_csum_one_bio(bio);
if (ret)
- return ret;
- return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
+ goto out_w_error;
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+ mirror_num, 0);
+ } else {
+ /*
+ * kthread helpers are used to submit writes so that
+ * checksumming can happen in parallel across all CPUs
+ */
+ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+ inode, rw, bio, mirror_num, 0,
+ bio_offset,
+ __btree_submit_bio_start,
+ __btree_submit_bio_done);
}
- /*
- * kthread helpers are used to submit writes so that checksumming
- * can happen in parallel across all CPUs
- */
- return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num, 0,
- bio_offset,
- __btree_submit_bio_start,
- __btree_submit_bio_done);
+ if (ret) {
+out_w_error:
+ bio_endio(bio, ret);
+ }
+ return ret;
}
#ifdef CONFIG_MIGRATION
@@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
static int btree_set_page_dirty(struct page *page)
{
+#ifdef DEBUG
struct extent_buffer *eb;
BUG_ON(!PagePrivate(page));
@@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page)
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(!atomic_read(&eb->refs));
btrfs_assert_tree_locked(eb);
+#endif
return __set_page_dirty_nobuffers(page);
}
@@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
root->fs_info->dirty_metadata_bytes);
}
spin_unlock(&root->fs_info->delalloc_lock);
- }
- /* ugh, clear_extent_buffer_dirty needs to lock the page */
- btrfs_set_lock_blocking(buf);
- clear_extent_buffer_dirty(buf);
+ /* ugh, clear_extent_buffer_dirty needs to lock the page */
+ btrfs_set_lock_blocking(buf);
+ clear_extent_buffer_dirty(buf);
+ }
}
}
@@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->root_key.objectid = objectid;
root->anon_dev = 0;
- spin_lock_init(&root->root_times_lock);
+ spin_lock_init(&root->root_item_lock);
}
static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
@@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->extent_commit_sem);
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
+ fs_info->dev_replace.lock_owner = 0;
+ atomic_set(&fs_info->dev_replace.nesting_level, 0);
+ mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+ mutex_init(&fs_info->dev_replace.lock_management_lock);
+ mutex_init(&fs_info->dev_replace.lock);
spin_lock_init(&fs_info->qgroup_lock);
fs_info->qgroup_tree = RB_ROOT;
@@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb,
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
+ fs_info->thread_pool_size,
+ &fs_info->generic_worker);
+
btrfs_init_workers(&fs_info->submit_workers, "submit",
min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size),
@@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->delayed_workers);
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
+ ret |= btrfs_start_workers(&fs_info->flush_workers);
if (ret) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb,
goto fail_tree_roots;
}
- btrfs_close_extra_devices(fs_devices);
+ /*
+ * keep the device that is marked to be the target device for the
+ * dev_replace procedure
+ */
+ btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
@@ -2490,6 +2517,14 @@ retry_root_backup:
goto fail_block_groups;
}
+ ret = btrfs_init_dev_replace(fs_info);
+ if (ret) {
+ pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+ goto fail_block_groups;
+ }
+
+ btrfs_close_extra_devices(fs_info, fs_devices, 1);
+
ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2503,6 +2538,13 @@ retry_root_backup:
}
fs_info->num_tolerated_disk_barrier_failures =
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
+ if (fs_info->fs_devices->missing_devices >
+ fs_info->num_tolerated_disk_barrier_failures &&
+ !(sb->s_flags & MS_RDONLY)) {
+ printk(KERN_WARNING
+ "Btrfs: too many missing devices, writeable mount is not allowed\n");
+ goto fail_block_groups;
+ }
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
@@ -2631,6 +2673,13 @@ retry_root_backup:
return ret;
}
+ ret = btrfs_resume_dev_replace_async(fs_info);
+ if (ret) {
+ pr_warn("btrfs: failed to resume dev_replace\n");
+ close_ctree(tree_root);
+ return ret;
+ }
+
return 0;
fail_qgroup:
@@ -2667,6 +2716,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
+ btrfs_stop_workers(&fs_info->flush_workers);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root)
smp_mb();
/* pause restriper - we want to resume on mount */
- btrfs_pause_balance(root->fs_info);
+ btrfs_pause_balance(fs_info);
- btrfs_scrub_cancel(root);
+ btrfs_dev_replace_suspend_for_unmount(fs_info);
+
+ btrfs_scrub_cancel(fs_info);
/* wait for any defraggers to finish */
wait_event(fs_info->transaction_wait,
(atomic_read(&fs_info->defrag_running) == 0));
/* clear out the rbtree of defraggable inodes */
- btrfs_run_defrag_inodes(fs_info);
+ btrfs_cleanup_defrag_inodes(fs_info);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
@@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
+ btrfs_stop_workers(&fs_info->flush_workers);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
int was_dirty;
btrfs_assert_tree_locked(buf);
- if (transid != root->fs_info->generation) {
- printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
+ if (transid != root->fs_info->generation)
+ WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
(unsigned long long)buf->start,
(unsigned long long)transid,
(unsigned long long)root->fs_info->generation);
- WARN_ON(1);
- }
was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
@@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
}
}
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
+ int flush_delayed)
{
/*
* looks as though older kernels can get into trouble with
@@ -3411,7 +3463,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
if (current->flags & PF_MEMALLOC)
return;
- btrfs_balance_delayed_items(root);
+ if (flush_delayed)
+ btrfs_balance_delayed_items(root);
num_dirty = root->fs_info->dirty_metadata_bytes;
@@ -3422,25 +3475,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
return;
}
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+void btrfs_btree_balance_dirty(struct btrfs_root *root)
{
- /*
- * looks as though older kernels can get into trouble with
- * this code, they end up stuck in balance_dirty_pages forever
- */
- u64 num_dirty;
- unsigned long thresh = 32 * 1024 * 1024;
-
- if (current->flags & PF_MEMALLOC)
- return;
-
- num_dirty = root->fs_info->dirty_metadata_bytes;
+ __btrfs_btree_balance_dirty(root, 1);
+}
- if (num_dirty > thresh) {
- balance_dirty_pages_ratelimited(
- root->fs_info->btree_inode->i_mapping);
- }
- return;
+void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
+{
+ __btrfs_btree_balance_dirty(root, 0);
}
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)