From 66d7e7f09f77456fe68683247d77721032a00ee5 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 12 Sep 2011 15:26:38 +0200 Subject: Btrfs: mark delayed refs as for cow Add a for_cow parameter to add_delayed_*_ref and pass the appropriate value from every call site. The for_cow parameter will later on be used to determine if a ref will change anything with respect to qgroups. Delayed refs coming from relocation are always counted as for_cow, as they don't change subvol quota. Also pass in the fs_info for later use. btrfs_find_all_roots() will use this as an optimization, as changes that are for_cow will not change anything with respect to which root points to a certain leaf. Thus, we don't need to add the current sequence number to those delayed refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5ccec23984c..ea819386b864 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3139,7 +3139,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), - ino, extent_offset); + ino, extent_offset, 0); BUG_ON(ret); } -- cgit From 6bf7e080d5bcb0d399ee38ce3dabbfad64448192 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 1 Dec 2011 14:35:19 +0100 Subject: Btrfs: make sure we're not using obsolete code in btrfs_get_extent There's code in btrfs_get_extent that should never be used. This patch turns a WARN_ON(1) into a BUG(), hoping we can remove the transaction code from btrfs_get_extent soon. Signed-off-by: Jan Schmidt --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ea819386b864..603d740f0f1c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5022,7 +5022,7 @@ again: } flush_dcache_page(page); } else if (create && PageUptodate(page)) { - WARN_ON(1); + BUG(); if (!trans) { kunmap(page); free_extent_map(em); -- cgit From 7ad85bb76a61801362701b77c5cee5aa09f35369 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: do not use btrfs_end_transaction_throttle everywhere A user reported a problem where things like open with O_CREAT would take up to 30 seconds when he had nfs activity on the same mount. This is because all of our quick metadata operations, like create, symlink etc all do btrfs_end_transaction_throttle, which if the transaction is blocked will wait for the commit to complete before it returns. This adds a ridiculous amount of latency and isn't really needed. The normal btrfs_end_transaction will mark the transaction as blocked and wake the transaction kthread up if it thinks the transaction needs to end (this being in the running out of global reserve space scenario), and this is all that is really needed since we've already done everything we're going to do, we just need to return. This should help people with the latency they were seeing when using synchronous heavy workloads. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index acc4ff39ca4e..5f8ba210c0aa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2845,7 +2845,7 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, BUG_ON(!root->fs_info->enospc_unlink); root->fs_info->enospc_unlink = 0; } - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); } static int btrfs_unlink(struct inode *dir, struct dentry *dentry) @@ -3434,7 +3434,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) i_size_write(inode, newsize); btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); ret = btrfs_update_inode(trans, root, inode); - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); } else { /* @@ -4655,7 +4655,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, } out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); if (drop_inode) { inode_dec_link_count(inode); @@ -4723,7 +4723,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, } out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4782,7 +4782,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -4848,7 +4848,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -6668,7 +6668,7 @@ end_trans: err = ret; nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, root); + ret = btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); } @@ -7075,7 +7075,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, btrfs_end_log_trans(root); } out_fail: - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); out_notrans: if (old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); @@ -7247,7 +7247,7 @@ out_unlock: if (!err) d_instantiate(dentry, inode); nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); iput(inode); -- cgit From f70a9a6b94af86fca069a7552ab672c31b457786 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: fix btrfsck error 400 when truncating a compressed Reproduce steps: # mkfs.btrfs /dev/sdb5 # mount /dev/sdb5 -o compress=lzo /mnt # dd if=/dev/zero of=/mnt/tmpfile bs=128K count=1 # sync # truncate -s 64K /mnt/tmpfile root 5 inode 257 errors 400 This is because of the wrong if condition, which is used to check if we should subtract the bytes of the dropped range from i_blocks/i_bytes of i-node or not. When we truncate a compressed extent, btrfs substracts the bytes of the whole extent, it's wrong. We should substract the real size that we truncate, no matter it is a compressed extent or not. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f8ba210c0aa..946a7f1b3295 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3009,7 +3009,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; - int encoding; int ret; int err = 0; u64 ino = btrfs_ino(inode); @@ -3059,7 +3058,6 @@ search_again: leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); - encoding = 0; if (found_key.objectid != ino) break; @@ -3072,10 +3070,6 @@ search_again: fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(leaf, fi); - encoding = btrfs_file_extent_compression(leaf, fi); - encoding |= btrfs_file_extent_encryption(leaf, fi); - encoding |= btrfs_file_extent_other_encoding(leaf, fi); - if (extent_type != BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_num_bytes(leaf, fi); @@ -3103,7 +3097,7 @@ search_again: if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); - if (!del_item && !encoding) { + if (!del_item) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); extent_num_bytes = new_size - -- cgit From ec39e180fd3188c983c94603634bfcd019f42ae7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: release space on error in page_mkwrite If updating the inode gave us an ENOSPC we were just returning in page_mkwrite, which is a problem since we make our reservation right before trying to update the inode, so fix the out label so that we actually free our reservation. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 946a7f1b3295..85fd86ea9830 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6488,8 +6488,8 @@ out_unlock: if (!ret) return VM_FAULT_LOCKED; unlock_page(page); - btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); out: + btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); return ret; } -- cgit From 90290e19820e3323ce6b9c2888eeb68bf29c278b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 2 Dec 2011 15:44:12 -0500 Subject: Btrfs: protect orphan block rsv with spin_lock We've been seeing warnings coming out of the orphan commit stuff forever from ceph. Turns out it's because we're racing with checking if the orphan block reserve is set, because we clear it outside of the spin_lock. So leave the normal fastpath checks where they are, but take the spin_lock and _recheck_ to make sure we haven't had an orphan block rsv added in the meantime. Then clear the root's orphan block rsv and release the lock. With this patch a user said the warnings went away and they usually showed up pretty soon after he started ceph. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 85fd86ea9830..619742d37166 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1951,12 +1951,28 @@ enum btrfs_orphan_cleanup_state { void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + struct btrfs_block_rsv *block_rsv; int ret; if (!list_empty(&root->orphan_list) || root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) return; + spin_lock(&root->orphan_lock); + if (!list_empty(&root->orphan_list)) { + spin_unlock(&root->orphan_lock); + return; + } + + if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) { + spin_unlock(&root->orphan_lock); + return; + } + + block_rsv = root->orphan_block_rsv; + root->orphan_block_rsv = NULL; + spin_unlock(&root->orphan_lock); + if (root->orphan_item_inserted && btrfs_root_refs(&root->root_item) > 0) { ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, @@ -1965,10 +1981,9 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, root->orphan_item_inserted = 0; } - if (root->orphan_block_rsv) { - WARN_ON(root->orphan_block_rsv->size > 0); - btrfs_free_block_rsv(root, root->orphan_block_rsv); - root->orphan_block_rsv = NULL; + if (block_rsv) { + WARN_ON(block_rsv->size > 0); + btrfs_free_block_rsv(root, block_rsv); } } -- cgit From f248679e86fead40cc78e724c7181d6bec1a2046 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Jan 2012 12:09:22 -0500 Subject: Btrfs: add a delalloc mutex to inodes for delalloc reservations I was using i_mutex for this, but we're getting bogus lockdep warnings by doing that and theres no real way to get rid of those, so just stop using i_mutex to protect delalloc metadata reservations and use a delalloc mutex instead. This shouldn't be contended often at all, only if you are writing and mmap writing to the file at the same time. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 619742d37166..5977987abdb1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2239,14 +2239,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; - /* - * Need to hold the imutex for reservation purposes, not - * a huge deal here but I have a WARN_ON in - * btrfs_delalloc_reserve_space to catch offenders. - */ - mutex_lock(&inode->i_mutex); ret = btrfs_truncate(inode); - mutex_unlock(&inode->i_mutex); } else { nr_unlink++; } @@ -6411,10 +6404,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 page_start; u64 page_end; - /* Need this to keep space reservations serialized */ - mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); - mutex_unlock(&inode->i_mutex); if (!ret) ret = btrfs_update_time(vma->vm_file); if (ret) { @@ -6758,6 +6748,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) extent_io_tree_init(&ei->io_tree, &inode->i_data); extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); mutex_init(&ei->log_mutex); + mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->delalloc_inodes); -- cgit