From d0082371cf086e0ba2bbd0367b2c9920532df24f Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 1 Mar 2012 14:57:19 +0100 Subject: btrfs: drop gfp_t from lock_extent lock_extent and unlock_extent are always called with GFP_NOFS, drop the argument and use GFP_NOFS consistently. Signed-off-by: Jeff Mahoney --- fs/btrfs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e8d06b6b9194..0eb80cc4ec81 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1105,8 +1105,7 @@ again: if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; lock_extent_bits(&BTRFS_I(inode)->io_tree, - start_pos, last_pos - 1, 0, &cached_state, - GFP_NOFS); + start_pos, last_pos - 1, 0, &cached_state); ordered = btrfs_lookup_first_ordered_extent(inode, last_pos - 1); if (ordered && @@ -1638,7 +1637,7 @@ static long btrfs_fallocate(struct file *file, int mode, * transaction */ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, - locked_end, 0, &cached_state, GFP_NOFS); + locked_end, 0, &cached_state); ordered = btrfs_lookup_first_ordered_extent(inode, alloc_end - 1); if (ordered && @@ -1737,7 +1736,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) return -ENXIO; lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, - &cached_state, GFP_NOFS); + &cached_state); /* * Delalloc is such a pain. If we have a hole and we have pending -- cgit From 79787eaab46121d4713ed03c8fc63b9ec3eaec76 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 12 Mar 2012 16:03:00 +0100 Subject: btrfs: replace many BUG_ONs with proper error handling btrfs currently handles most errors with BUG_ON. This patch is a work-in- progress but aims to handle most errors other than internal logic errors and ENOMEM more gracefully. This iteration prevents most crashes but can run into lockups with the page lock on occasion when the timing "works out." Signed-off-by: Jeff Mahoney --- fs/btrfs/file.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0eb80cc4ec81..d83260d7498f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -452,7 +452,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split = alloc_extent_map(); if (!split2) split2 = alloc_extent_map(); - BUG_ON(!split || !split2); + BUG_ON(!split || !split2); /* -ENOMEM */ write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); @@ -494,7 +494,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->flags = flags; split->compress_type = em->compress_type; ret = add_extent_mapping(em_tree, split); - BUG_ON(ret); + BUG_ON(ret); /* Logic error */ free_extent_map(split); split = split2; split2 = NULL; @@ -520,7 +520,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, } ret = add_extent_mapping(em_tree, split); - BUG_ON(ret); + BUG_ON(ret); /* Logic error */ free_extent_map(split); split = NULL; } @@ -679,7 +679,7 @@ next_slot: root->root_key.objectid, new_key.objectid, start - extent_offset, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ *hint_byte = disk_bytenr; } key.offset = start; @@ -754,7 +754,7 @@ next_slot: root->root_key.objectid, key.objectid, key.offset - extent_offset, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ inode_sub_bytes(inode, extent_end - key.offset); *hint_byte = disk_bytenr; @@ -770,7 +770,10 @@ next_slot: ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } del_nr = 0; del_slot = 0; @@ -782,11 +785,13 @@ next_slot: BUG_ON(1); } - if (del_nr > 0) { + if (!ret && del_nr > 0) { ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - BUG_ON(ret); + if (ret) + btrfs_abort_transaction(trans, root, ret); } +out: btrfs_free_path(path); return ret; } @@ -944,7 +949,10 @@ again: btrfs_release_path(path); goto again; } - BUG_ON(ret < 0); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0] - 1, @@ -963,7 +971,7 @@ again: ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ if (split == start) { key.offset = start; @@ -990,7 +998,7 @@ again: ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } other_start = 0; other_end = start; @@ -1007,7 +1015,7 @@ again: ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } if (del_nr == 0) { fi = btrfs_item_ptr(leaf, path->slots[0], @@ -1025,7 +1033,10 @@ again: btrfs_mark_buffer_dirty(leaf); ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - BUG_ON(ret); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } } out: btrfs_free_path(path); @@ -1666,7 +1677,13 @@ static long btrfs_fallocate(struct file *file, int mode, em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); - BUG_ON(IS_ERR_OR_NULL(em)); + if (IS_ERR_OR_NULL(em)) { + if (!em) + ret = -ENOMEM; + else + ret = PTR_ERR(em); + break; + } last_byte = min(extent_map_end(em), alloc_end); actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = (last_byte + mask) & ~mask; -- cgit From dc7fdde39e4962b1a88741f7eba2a6b3be1285d8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2012 14:31:29 -0400 Subject: Btrfs: reduce lock contention during extent insertion We're spending huge amounts of time on lock contention during end_io processing because we unconditionally assume we are overwriting an existing extent in the file for each IO. This checks to see if we are outside i_size, and if so, it uses a less expensive readonly search of the btree to look for existing extents. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d83260d7498f..53bf2d764bbc 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, int extent_type; int recow; int ret; + int modify_tree = -1; if (drop_cache) btrfs_drop_extent_cache(inode, start, end - 1, 0); @@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, if (!path) return -ENOMEM; + if (start >= BTRFS_I(inode)->disk_i_size) + modify_tree = 0; + while (1) { recow = 0; ret = btrfs_lookup_file_extent(trans, root, path, ino, - search_start, -1); + search_start, modify_tree); if (ret < 0) break; if (ret > 0 && path->slots[0] > 0 && search_start == start) { @@ -634,7 +638,8 @@ next_slot: } search_start = max(key.offset, start); - if (recow) { + if (recow || !modify_tree) { + modify_tree = -1; btrfs_release_path(path); continue; } -- cgit From 0c4d2d95d06e920e0c61707e62c7fffc9c57f63a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 5 Apr 2012 15:03:02 -0400 Subject: Btrfs: use i_version instead of our own sequence We've been keeping around the inode sequence number in hopes that somebody would use it, but nobody uses it and people actually use i_version which serves the same purpose, so use i_version where we used the incore inode's sequence number and that way the sequence is updated properly across the board, and not just in file write. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc..8aa8d7fe74d7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1409,7 +1409,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, mutex_unlock(&inode->i_mutex); goto out; } - BTRFS_I(inode)->sequence++; start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { -- cgit From 0885ef5b5601e9b007c383e77c172769b1f214fd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 23 Apr 2012 15:09:39 -0400 Subject: Btrfs: do not do filemap_write_and_wait_range in fsync We already do the btrfs_wait_ordered_range which will do this for us, so just remove this call so we don't call it twice. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8aa8d7fe74d7..cfc0ab915d03 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1497,14 +1497,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_btrfs_sync_file(file, datasync); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; mutex_lock(&inode->i_mutex); - /* we wait first, since the writeback may change the inode */ + /* + * we wait first, since the writeback may change the inode, also wait + * ordered range does a filemape_write_and_wait_range which is why we + * don't do it above like other file systems. + */ root->log_batch++; - btrfs_wait_ordered_range(inode, 0, (u64)-1); + btrfs_wait_ordered_range(inode, start, end); root->log_batch++; /* -- cgit From 72ac3c0d7921f943d92d1ef42a549fb52e56817d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 23 May 2012 14:13:11 -0400 Subject: Btrfs: convert the inode bit field to use the actual bit operations Miao pointed this out while I was working on an orphan problem that messing with a bitfield where different ranges are protected by different locks doesn't work out right. Turns out we've been doing this forever where we have different parts of the bit field protected by either no lock at all or different locks which could cause all sorts of weird problems including the issue I was hitting. So instead make a runtime_flags thing that we use the normal bit operations on that are all atomic so we can keep having our no/different locking for the different flags and then make force_compress it's own thing so it can be treated normally. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cfc0ab915d03..c9005f216975 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -103,7 +103,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, goto exists; } } - BTRFS_I(inode)->in_defrag = 1; + set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); return; @@ -131,7 +131,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (btrfs_fs_closing(root->fs_info)) return 0; - if (BTRFS_I(inode)->in_defrag) + if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) return 0; if (trans) @@ -148,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, defrag->root = root->root_key.objectid; spin_lock(&root->fs_info->defrag_inodes_lock); - if (!BTRFS_I(inode)->in_defrag) + if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) __btrfs_add_inode_defrag(inode, defrag); else kfree(defrag); @@ -252,7 +252,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) goto next; /* do a chunk of defrag */ - BTRFS_I(inode)->in_defrag = 0; + clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); range.start = defrag->last_offset; num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, defrag_batch); @@ -1465,8 +1465,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * flush down new bytes that may have been written if the * application were using truncate to replace a file in place. */ - if (BTRFS_I(inode)->ordered_data_close) { - BTRFS_I(inode)->ordered_data_close = 0; + if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, + &BTRFS_I(inode)->runtime_flags)) { btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(inode->i_mapping); -- cgit From 762f2263260d576504aeb23d20f90120acdb025f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 24 May 2012 18:58:27 +0800 Subject: Btrfs: fix the same inode id problem when doing auto defragment Two files in the different subvolumes may have the same inode id, so The rb-tree which is used to manage the defragment object must take it into account. This patch fix this problem. Signed-off-by: Miao Xie --- fs/btrfs/file.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c9005f216975..2e63cdc2b093 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -65,6 +65,21 @@ struct inode_defrag { int cycled; }; +static int __compare_inode_defrag(struct inode_defrag *defrag1, + struct inode_defrag *defrag2) +{ + if (defrag1->root > defrag2->root) + return 1; + else if (defrag1->root < defrag2->root) + return -1; + else if (defrag1->ino > defrag2->ino) + return 1; + else if (defrag1->ino < defrag2->ino) + return -1; + else + return 0; +} + /* pop a record for an inode into the defrag tree. The lock * must be held already * @@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode, struct inode_defrag *entry; struct rb_node **p; struct rb_node *parent = NULL; + int ret; p = &root->fs_info->defrag_inodes.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct inode_defrag, rb_node); - if (defrag->ino < entry->ino) + ret = __compare_inode_defrag(defrag, entry); + if (ret < 0) p = &parent->rb_left; - else if (defrag->ino > entry->ino) + else if (ret > 0) p = &parent->rb_right; else { /* if we're reinserting an entry for @@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, /* * must be called with the defrag_inodes lock held */ -struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, +struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, + u64 root, u64 ino, struct rb_node **next) { struct inode_defrag *entry = NULL; + struct inode_defrag tmp; struct rb_node *p; struct rb_node *parent = NULL; + int ret; + + tmp.ino = ino; + tmp.root = root; p = info->defrag_inodes.rb_node; while (p) { parent = p; entry = rb_entry(parent, struct inode_defrag, rb_node); - if (ino < entry->ino) + ret = __compare_inode_defrag(&tmp, entry); + if (ret < 0) p = parent->rb_left; - else if (ino > entry->ino) + else if (ret > 0) p = parent->rb_right; else return entry; } if (next) { - while (parent && ino > entry->ino) { + while (parent && __compare_inode_defrag(&tmp, entry) > 0) { parent = rb_next(parent); entry = rb_entry(parent, struct inode_defrag, rb_node); } @@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) struct btrfs_key key; struct btrfs_ioctl_defrag_range_args range; u64 first_ino = 0; + u64 root_objectid = 0; int num_defrag; int defrag_batch = 1024; @@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) n = NULL; /* find an inode to defrag */ - defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); + defrag = btrfs_find_defrag_inode(fs_info, root_objectid, + first_ino, &n); if (!defrag) { - if (n) - defrag = rb_entry(n, struct inode_defrag, rb_node); - else if (first_ino) { + if (n) { + defrag = rb_entry(n, struct inode_defrag, + rb_node); + } else if (root_objectid || first_ino) { + root_objectid = 0; first_ino = 0; continue; } else { @@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) /* remove it from the rbtree */ first_ino = defrag->ino + 1; + root_objectid = defrag->root; rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); if (btrfs_fs_closing(fs_info)) -- cgit From 22ee6985de7d3e81ec0cef9c6ba01b45ad1bafeb Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 May 2012 16:57:49 -0400 Subject: Btrfs: check to see if the inode is in the log before fsyncing We have this check down in the actual logging code, but this is after we start a transaction and all that good stuff. So move the helper inode_in_log() out so we can call it in fsync() and avoid starting a transaction altogether and just exit if we've already fsync()'ed this file recently. You would notice this issue if you fsync()'ed a file over and over again until the transaction committed. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2e63cdc2b093..876cddd6b2f0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1552,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * syncing */ smp_mb(); - if (BTRFS_I(inode)->last_trans <= + if (btrfs_inode_in_log(inode, root->fs_info->generation) || + BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; mutex_unlock(&inode->i_mutex); -- cgit From e41f941a23115e84a8550b3d901a13a14b2edc2f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:46:47 -0400 Subject: Btrfs: move over to use ->update_time Btrfs had been doing it's own file_update_time so we could catch ENOSPC properly, so just update our btrfs_update_time to work with the new stuff and then we'll be fancy later. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc..974beb84ed61 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1404,7 +1404,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - err = btrfs_update_time(file); + err = file_update_time(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; -- cgit