summaryrefslogtreecommitdiff
path: root/fs/btrfs/delayed-inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
-rw-r--r--fs/btrfs/delayed-inode.c245
1 files changed, 114 insertions, 131 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 19e4ad2f3f2e..86ec2edc05e8 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -18,10 +18,12 @@
*/
#include <linux/slab.h>
+#include <linux/iversion.h>
#include "delayed-inode.h"
#include "disk-io.h"
#include "transaction.h"
#include "ctree.h"
+#include "qgroup.h"
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
@@ -41,7 +43,7 @@ int __init btrfs_delayed_inode_init(void)
return 0;
}
-void btrfs_delayed_inode_exit(void)
+void __cold btrfs_delayed_inode_exit(void)
{
kmem_cache_destroy(delayed_node_cache);
}
@@ -87,6 +89,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
spin_lock(&root->inode_lock);
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+
if (node) {
if (btrfs_inode->delayed_node) {
refcount_inc(&node->refs); /* can be accessed */
@@ -94,9 +97,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
spin_unlock(&root->inode_lock);
return node;
}
- btrfs_inode->delayed_node = node;
- /* can be accessed and cached in the inode */
- refcount_add(2, &node->refs);
+
+ /*
+ * It's possible that we're racing into the middle of removing
+ * this node from the radix tree. In this case, the refcount
+ * was zero and it should never go back to one. Just return
+ * NULL like it was never in the radix at all; our release
+ * function is in the process of removing it.
+ *
+ * Some implementations of refcount_inc refuse to bump the
+ * refcount once it has hit zero. If we don't do this dance
+ * here, refcount_inc() may decide to just WARN_ONCE() instead
+ * of actually bumping the refcount.
+ *
+ * If this node is properly in the radix, we want to bump the
+ * refcount twice, once for the inode and once for this get
+ * operation.
+ */
+ if (refcount_inc_not_zero(&node->refs)) {
+ refcount_inc(&node->refs);
+ btrfs_inode->delayed_node = node;
+ } else {
+ node = NULL;
+ }
+
spin_unlock(&root->inode_lock);
return node;
}
@@ -254,17 +278,18 @@ static void __btrfs_release_delayed_node(
mutex_unlock(&delayed_node->mutex);
if (refcount_dec_and_test(&delayed_node->refs)) {
- bool free = false;
struct btrfs_root *root = delayed_node->root;
+
spin_lock(&root->inode_lock);
- if (refcount_read(&delayed_node->refs) == 0) {
- radix_tree_delete(&root->delayed_nodes_tree,
- delayed_node->inode_id);
- free = true;
- }
+ /*
+ * Once our refcount goes to zero, nobody is allowed to bump it
+ * back up. We can delete it now.
+ */
+ ASSERT(refcount_read(&delayed_node->refs) == 0);
+ radix_tree_delete(&root->delayed_nodes_tree,
+ delayed_node->inode_id);
spin_unlock(&root->inode_lock);
- if (free)
- kmem_cache_free(delayed_node_cache, delayed_node);
+ kmem_cache_free(delayed_node_cache, delayed_node);
}
}
@@ -528,11 +553,12 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
}
static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+ struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
struct btrfs_block_rsv *src_rsv;
struct btrfs_block_rsv *dst_rsv;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 num_bytes;
int ret;
@@ -554,15 +580,17 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
return ret;
}
-static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
+static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
struct btrfs_block_rsv *rsv;
+ struct btrfs_fs_info *fs_info = root->fs_info;
if (!item->bytes_reserved)
return;
rsv = &fs_info->delayed_block_rsv;
+ btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
@@ -581,36 +609,15 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
- bool release = false;
src_rsv = trans->block_rsv;
dst_rsv = &fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- /*
- * If our block_rsv is the delalloc block reserve then check and see if
- * we have our extra reservation for updating the inode. If not fall
- * through and try to reserve space quickly.
- *
- * We used to try and steal from the delalloc block rsv or the global
- * reserve, but we'd steal a full reservation, which isn't kind. We are
- * here through delalloc which means we've likely just cowed down close
- * to the leaf that contains the inode, so we would steal less just
- * doing the fallback inode update, so if we do end up having to steal
- * from the global block rsv we hopefully only steal one or two blocks
- * worth which is less likely to hurt us.
- */
- if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
- spin_lock(&inode->lock);
- if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &inode->runtime_flags))
- release = true;
- else
- src_rsv = NULL;
- spin_unlock(&inode->lock);
- }
-
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+ if (ret < 0)
+ return ret;
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
@@ -618,7 +625,7 @@ static int btrfs_delayed_inode_reserve_metadata(
* space.
*
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
- * we're accounted for.
+ * we always reserve enough to update the inode item.
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
@@ -630,8 +637,10 @@ static int btrfs_delayed_inode_reserve_metadata(
* EAGAIN to make us stop the transaction we have, so return
* ENOSPC instead so that btrfs_dirty_inode knows what to do.
*/
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
ret = -ENOSPC;
+ btrfs_qgroup_free_meta_prealloc(root, num_bytes);
+ }
if (!ret) {
node->bytes_reserved = num_bytes;
trace_btrfs_space_reservation(fs_info,
@@ -643,37 +652,18 @@ static int btrfs_delayed_inode_reserve_metadata(
}
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
-
- /*
- * Migrate only takes a reservation, it doesn't touch the size of the
- * block_rsv. This is to simplify people who don't normally have things
- * migrated from their block rsv. If they go to release their
- * reservation, that will decrease the size as well, so if migrate
- * reduced size we'd end up with a negative size. But for the
- * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
- * but we could in fact do this reserve/migrate dance several times
- * between the time we did the original reservation and we'd clean it
- * up. So to take care of this, release the space for the meta
- * reservation here. I think it may be time for a documentation page on
- * how block rsvs. work.
- */
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
}
- if (release) {
- trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), num_bytes, 0);
- btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
- }
-
return ret;
}
static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_node *node)
+ struct btrfs_delayed_node *node,
+ bool qgroup_free)
{
struct btrfs_block_rsv *rsv;
@@ -685,6 +675,12 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
node->inode_id, node->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, rsv,
node->bytes_reserved);
+ if (qgroup_free)
+ btrfs_qgroup_free_meta_prealloc(node->root,
+ node->bytes_reserved);
+ else
+ btrfs_qgroup_convert_reserved_meta(node->root,
+ node->bytes_reserved);
node->bytes_reserved = 0;
}
@@ -786,7 +782,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
curr->data_len);
slot++;
- btrfs_delayed_item_release_metadata(fs_info, curr);
+ btrfs_delayed_item_release_metadata(root, curr);
list_del(&curr->tree_list);
btrfs_release_delayed_item(curr);
@@ -808,7 +804,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *delayed_item)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
char *ptr;
int ret;
@@ -826,7 +821,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
delayed_item->data_len);
btrfs_mark_buffer_dirty(leaf);
- btrfs_delayed_item_release_metadata(fs_info, delayed_item);
+ btrfs_delayed_item_release_metadata(root, delayed_item);
return 0;
}
@@ -878,7 +873,6 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *item)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_item *curr, *next;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -928,7 +922,7 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
goto out;
list_for_each_entry_safe(curr, next, &head, tree_list) {
- btrfs_delayed_item_release_metadata(fs_info, curr);
+ btrfs_delayed_item_release_metadata(root, curr);
list_del(&curr->tree_list);
btrfs_release_delayed_item(curr);
}
@@ -1071,7 +1065,7 @@ out:
no_iref:
btrfs_release_path(path);
err_out:
- btrfs_delayed_inode_release_metadata(fs_info, node);
+ btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
btrfs_release_delayed_inode(node);
return ret;
@@ -1135,9 +1129,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
* Returns < 0 on error and returns with an aborted transaction with any
* outstanding delayed items cleaned up.
*/
-static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, int nr)
+static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
@@ -1182,16 +1176,14 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
{
- return __btrfs_run_delayed_items(trans, fs_info, -1);
+ return __btrfs_run_delayed_items(trans, -1);
}
-int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, int nr)
+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
{
- return __btrfs_run_delayed_items(trans, fs_info, nr);
+ return __btrfs_run_delayed_items(trans, nr);
}
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
@@ -1323,40 +1315,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
if (!path)
goto out;
-again:
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
- goto free_path;
+ do {
+ if (atomic_read(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND / 2)
+ break;
- delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
- if (!delayed_node)
- goto free_path;
+ delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+ if (!delayed_node)
+ break;
- path->leave_spinning = 1;
- root = delayed_node->root;
+ path->leave_spinning = 1;
+ root = delayed_node->root;
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto release_path;
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ btrfs_release_path(path);
+ btrfs_release_prepared_delayed_node(delayed_node);
+ total_done++;
+ continue;
+ }
- block_rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->delayed_block_rsv;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->delayed_block_rsv;
- __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
+ __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
- trans->block_rsv = block_rsv;
- btrfs_end_transaction(trans);
- btrfs_btree_balance_dirty_nodelay(root->fs_info);
+ trans->block_rsv = block_rsv;
+ btrfs_end_transaction(trans);
+ btrfs_btree_balance_dirty_nodelay(root->fs_info);
-release_path:
- btrfs_release_path(path);
- total_done++;
+ btrfs_release_path(path);
+ btrfs_release_prepared_delayed_node(delayed_node);
+ total_done++;
- btrfs_release_prepared_delayed_node(delayed_node);
- if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
- total_done < async_work->nr)
- goto again;
+ } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
+ || total_done < async_work->nr);
-free_path:
btrfs_free_path(path);
out:
wake_up(&delayed_root->wait);
@@ -1369,10 +1363,6 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
{
struct btrfs_async_delayed_work *async_work;
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
- btrfs_workqueue_normal_congested(fs_info->delayed_workers))
- return 0;
-
async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
if (!async_work)
return -ENOMEM;
@@ -1408,7 +1398,8 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
{
struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
+ if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
+ btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return;
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
@@ -1464,7 +1455,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
btrfs_set_stack_dir_type(dir_item, type);
memcpy((char *)(dir_item + 1), name, name_len);
- ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
+ ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible
@@ -1501,7 +1492,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
return 1;
}
- btrfs_delayed_item_release_metadata(fs_info, item);
+ btrfs_delayed_item_release_metadata(node->root, item);
btrfs_release_delayed_item(item);
mutex_unlock(&node->mutex);
return 0;
@@ -1536,7 +1527,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
item->key = item_key;
- ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
+ ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible.
@@ -1654,28 +1645,18 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index)
{
- struct btrfs_delayed_item *curr, *next;
- int ret;
-
- if (list_empty(del_list))
- return 0;
+ struct btrfs_delayed_item *curr;
+ int ret = 0;
- list_for_each_entry_safe(curr, next, del_list, readdir_list) {
+ list_for_each_entry(curr, del_list, readdir_list) {
if (curr->key.offset > index)
break;
-
- list_del(&curr->readdir_list);
- ret = (curr->key.offset == index);
-
- if (refcount_dec_and_test(&curr->refs))
- kfree(curr);
-
- if (ret)
- return 1;
- else
- continue;
+ if (curr->key.offset == index) {
+ ret = 1;
+ break;
+ }
}
- return 0;
+ return ret;
}
/*
@@ -1744,7 +1725,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
btrfs_set_stack_inode_generation(inode_item,
BTRFS_I(inode)->generation);
- btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
+ btrfs_set_stack_inode_sequence(inode_item,
+ inode_peek_iversion(inode));
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
@@ -1798,7 +1780,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
- inode->i_version = btrfs_stack_inode_sequence(inode_item);
+ inode_set_iversion_queried(inode,
+ btrfs_stack_inode_sequence(inode_item));
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
@@ -1909,7 +1892,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
mutex_lock(&delayed_node->mutex);
curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
while (curr_item) {
- btrfs_delayed_item_release_metadata(fs_info, curr_item);
+ btrfs_delayed_item_release_metadata(root, curr_item);
prev_item = curr_item;
curr_item = __btrfs_next_delayed_item(prev_item);
btrfs_release_delayed_item(prev_item);
@@ -1917,7 +1900,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
while (curr_item) {
- btrfs_delayed_item_release_metadata(fs_info, curr_item);
+ btrfs_delayed_item_release_metadata(root, curr_item);
prev_item = curr_item;
curr_item = __btrfs_next_delayed_item(prev_item);
btrfs_release_delayed_item(prev_item);
@@ -1927,7 +1910,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
btrfs_release_delayed_iref(delayed_node);
if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
- btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
+ btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
btrfs_release_delayed_inode(delayed_node);
}
mutex_unlock(&delayed_node->mutex);