diff options
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 810 |
1 files changed, 445 insertions, 365 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0095c6e4c3d1..ce6e9f8812e0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -28,11 +28,7 @@ static struct kmem_cache *delayed_node_cache; int __init btrfs_delayed_inode_init(void) { - delayed_node_cache = kmem_cache_create("btrfs_delayed_node", - sizeof(struct btrfs_delayed_node), - 0, - SLAB_MEM_SPREAD, - NULL); + delayed_node_cache = KMEM_CACHE(btrfs_delayed_node, 0); if (!delayed_node_cache) return -ENOMEM; return 0; @@ -43,6 +39,17 @@ void __cold btrfs_delayed_inode_exit(void) kmem_cache_destroy(delayed_node_cache); } +void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root) +{ + atomic_set(&delayed_root->items, 0); + atomic_set(&delayed_root->items_seq, 0); + delayed_root->nodes = 0; + spin_lock_init(&delayed_root->lock); + init_waitqueue_head(&delayed_root->wait); + INIT_LIST_HEAD(&delayed_root->node_list); + INIT_LIST_HEAD(&delayed_root->prepare_list); +} + static inline void btrfs_init_delayed_node( struct btrfs_delayed_node *delayed_node, struct btrfs_root *root, u64 inode_id) @@ -50,6 +57,7 @@ static inline void btrfs_init_delayed_node( delayed_node->root = root; delayed_node->inode_id = inode_id; refcount_set(&delayed_node->refs, 0); + btrfs_delayed_node_ref_tracker_dir_init(delayed_node); delayed_node->ins_root = RB_ROOT_CACHED; delayed_node->del_root = RB_ROOT_CACHED; mutex_init(&delayed_node->mutex); @@ -58,7 +66,8 @@ static inline void btrfs_init_delayed_node( } static struct btrfs_delayed_node *btrfs_get_delayed_node( - struct btrfs_inode *btrfs_inode) + struct btrfs_inode *btrfs_inode, + struct btrfs_ref_tracker *tracker) { struct btrfs_root *root = btrfs_inode->root; u64 ino = btrfs_ino(btrfs_inode); @@ -67,25 +76,27 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( node = READ_ONCE(btrfs_inode->delayed_node); if (node) { refcount_inc(&node->refs); + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_NOFS); return node; } - spin_lock(&root->inode_lock); - node = radix_tree_lookup(&root->delayed_nodes_tree, ino); + xa_lock(&root->delayed_nodes); + node = xa_load(&root->delayed_nodes, ino); if (node) { if (btrfs_inode->delayed_node) { refcount_inc(&node->refs); /* can be accessed */ + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); BUG_ON(btrfs_inode->delayed_node != node); - spin_unlock(&root->inode_lock); + xa_unlock(&root->delayed_nodes); return node; } /* * It's possible that we're racing into the middle of removing - * this node from the radix tree. In this case, the refcount + * this node from the xarray. In this case, the refcount * was zero and it should never go back to one. Just return - * NULL like it was never in the radix at all; our release + * NULL like it was never in the xarray at all; our release * function is in the process of removing it. * * Some implementations of refcount_inc refuse to bump the @@ -93,36 +104,46 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( * here, refcount_inc() may decide to just WARN_ONCE() instead * of actually bumping the refcount. * - * If this node is properly in the radix, we want to bump the + * If this node is properly in the xarray, we want to bump the * refcount twice, once for the inode and once for this get * operation. */ if (refcount_inc_not_zero(&node->refs)) { refcount_inc(&node->refs); + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); + btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, + GFP_ATOMIC); btrfs_inode->delayed_node = node; } else { node = NULL; } - spin_unlock(&root->inode_lock); + xa_unlock(&root->delayed_nodes); return node; } - spin_unlock(&root->inode_lock); + xa_unlock(&root->delayed_nodes); return NULL; } -/* Will return either the node or PTR_ERR(-ENOMEM) */ +/* + * Look up an existing delayed node associated with @btrfs_inode or create a new + * one and insert it to the delayed nodes of the root. + * + * Return the delayed node, or error pointer on failure. + */ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( - struct btrfs_inode *btrfs_inode) + struct btrfs_inode *btrfs_inode, + struct btrfs_ref_tracker *tracker) { struct btrfs_delayed_node *node; struct btrfs_root *root = btrfs_inode->root; u64 ino = btrfs_ino(btrfs_inode); int ret; + void *ptr; again: - node = btrfs_get_delayed_node(btrfs_inode); + node = btrfs_get_delayed_node(btrfs_inode, tracker); if (node) return node; @@ -131,26 +152,35 @@ again: return ERR_PTR(-ENOMEM); btrfs_init_delayed_node(node, root, ino); - /* cached in the btrfs inode and can be accessed */ - refcount_set(&node->refs, 2); - - ret = radix_tree_preload(GFP_NOFS); - if (ret) { + /* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */ + ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS); + if (ret == -ENOMEM) { + btrfs_delayed_node_ref_tracker_dir_exit(node); kmem_cache_free(delayed_node_cache, node); - return ERR_PTR(ret); + return ERR_PTR(-ENOMEM); } - - spin_lock(&root->inode_lock); - ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); - if (ret == -EEXIST) { - spin_unlock(&root->inode_lock); + xa_lock(&root->delayed_nodes); + ptr = xa_load(&root->delayed_nodes, ino); + if (ptr) { + /* Somebody inserted it, go back and read it. */ + xa_unlock(&root->delayed_nodes); + btrfs_delayed_node_ref_tracker_dir_exit(node); kmem_cache_free(delayed_node_cache, node); - radix_tree_preload_end(); + node = NULL; goto again; } + ptr = __xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC); + ASSERT(xa_err(ptr) != -EINVAL); + ASSERT(xa_err(ptr) != -ENOMEM); + ASSERT(ptr == NULL); + + /* Cached in the inode and can be accessed. */ + refcount_set(&node->refs, 2); + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); + btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, GFP_ATOMIC); + btrfs_inode->delayed_node = node; - spin_unlock(&root->inode_lock); - radix_tree_preload_end(); + xa_unlock(&root->delayed_nodes); return node; } @@ -174,6 +204,8 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root, list_add_tail(&node->n_list, &root->node_list); list_add_tail(&node->p_list, &root->prepare_list); refcount_inc(&node->refs); /* inserted into list */ + btrfs_delayed_node_ref_tracker_alloc(node, &node->node_list_tracker, + GFP_ATOMIC); root->nodes++; set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); } @@ -187,6 +219,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, spin_lock(&root->lock); if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { root->nodes--; + btrfs_delayed_node_ref_tracker_free(node, &node->node_list_tracker); refcount_dec(&node->refs); /* not in the list */ list_del_init(&node->n_list); if (!list_empty(&node->p_list)) @@ -197,26 +230,26 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, } static struct btrfs_delayed_node *btrfs_first_delayed_node( - struct btrfs_delayed_root *delayed_root) + struct btrfs_delayed_root *delayed_root, + struct btrfs_ref_tracker *tracker) { - struct list_head *p; - struct btrfs_delayed_node *node = NULL; + struct btrfs_delayed_node *node; spin_lock(&delayed_root->lock); - if (list_empty(&delayed_root->node_list)) - goto out; - - p = delayed_root->node_list.next; - node = list_entry(p, struct btrfs_delayed_node, n_list); - refcount_inc(&node->refs); -out: + node = list_first_entry_or_null(&delayed_root->node_list, + struct btrfs_delayed_node, n_list); + if (node) { + refcount_inc(&node->refs); + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); + } spin_unlock(&delayed_root->lock); return node; } static struct btrfs_delayed_node *btrfs_next_delayed_node( - struct btrfs_delayed_node *node) + struct btrfs_delayed_node *node, + struct btrfs_ref_tracker *tracker) { struct btrfs_delayed_root *delayed_root; struct list_head *p; @@ -236,6 +269,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node( next = list_entry(p, struct btrfs_delayed_node, n_list); refcount_inc(&next->refs); + btrfs_delayed_node_ref_tracker_alloc(next, tracker, GFP_ATOMIC); out: spin_unlock(&delayed_root->lock); @@ -244,7 +278,7 @@ out: static void __btrfs_release_delayed_node( struct btrfs_delayed_node *delayed_node, - int mod) + int mod, struct btrfs_ref_tracker *tracker) { struct btrfs_delayed_root *delayed_root; @@ -260,51 +294,51 @@ static void __btrfs_release_delayed_node( btrfs_dequeue_delayed_node(delayed_root, delayed_node); mutex_unlock(&delayed_node->mutex); + btrfs_delayed_node_ref_tracker_free(delayed_node, tracker); if (refcount_dec_and_test(&delayed_node->refs)) { struct btrfs_root *root = delayed_node->root; - spin_lock(&root->inode_lock); + xa_erase(&root->delayed_nodes, delayed_node->inode_id); /* * Once our refcount goes to zero, nobody is allowed to bump it * back up. We can delete it now. */ ASSERT(refcount_read(&delayed_node->refs) == 0); - radix_tree_delete(&root->delayed_nodes_tree, - delayed_node->inode_id); - spin_unlock(&root->inode_lock); + btrfs_delayed_node_ref_tracker_dir_exit(delayed_node); kmem_cache_free(delayed_node_cache, delayed_node); } } -static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) +static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node, + struct btrfs_ref_tracker *tracker) { - __btrfs_release_delayed_node(node, 0); + __btrfs_release_delayed_node(node, 0, tracker); } static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( - struct btrfs_delayed_root *delayed_root) + struct btrfs_delayed_root *delayed_root, + struct btrfs_ref_tracker *tracker) { - struct list_head *p; - struct btrfs_delayed_node *node = NULL; + struct btrfs_delayed_node *node; spin_lock(&delayed_root->lock); - if (list_empty(&delayed_root->prepare_list)) - goto out; - - p = delayed_root->prepare_list.next; - list_del_init(p); - node = list_entry(p, struct btrfs_delayed_node, p_list); - refcount_inc(&node->refs); -out: + node = list_first_entry_or_null(&delayed_root->prepare_list, + struct btrfs_delayed_node, p_list); + if (node) { + list_del_init(&node->p_list); + refcount_inc(&node->refs); + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); + } spin_unlock(&delayed_root->lock); return node; } static inline void btrfs_release_prepared_delayed_node( - struct btrfs_delayed_node *node) + struct btrfs_delayed_node *node, + struct btrfs_ref_tracker *tracker) { - __btrfs_release_delayed_node(node, 1); + __btrfs_release_delayed_node(node, 1, tracker); } static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, @@ -313,7 +347,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, { struct btrfs_delayed_item *item; - item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); + item = kmalloc(struct_size(item, data, data_len), GFP_NOFS); if (item) { item->data_len = data_len; item->type = type; @@ -327,8 +361,23 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, return item; } +static int delayed_item_index_cmp(const void *key, const struct rb_node *node) +{ + const u64 *index = key; + const struct btrfs_delayed_item *delayed_item = rb_entry(node, + struct btrfs_delayed_item, rb_node); + + if (delayed_item->index < *index) + return 1; + else if (delayed_item->index > *index) + return -1; + + return 0; +} + /* - * __btrfs_lookup_delayed_item - look up the delayed item by key + * Look up the delayed item by key. + * * @delayed_node: pointer to the delayed node * @index: the dir index value to lookup (offset of a dir index key) * @@ -339,57 +388,35 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( struct rb_root *root, u64 index) { - struct rb_node *node = root->rb_node; - struct btrfs_delayed_item *delayed_item = NULL; + struct rb_node *node; - while (node) { - delayed_item = rb_entry(node, struct btrfs_delayed_item, - rb_node); - if (delayed_item->index < index) - node = node->rb_right; - else if (delayed_item->index > index) - node = node->rb_left; - else - return delayed_item; - } + node = rb_find(&index, root, delayed_item_index_cmp); + return rb_entry_safe(node, struct btrfs_delayed_item, rb_node); +} - return NULL; +static int btrfs_delayed_item_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct btrfs_delayed_item *new_item = + rb_entry(new, struct btrfs_delayed_item, rb_node); + + return delayed_item_index_cmp(&new_item->index, exist); } static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, struct btrfs_delayed_item *ins) { - struct rb_node **p, *node; - struct rb_node *parent_node = NULL; struct rb_root_cached *root; - struct btrfs_delayed_item *item; - bool leftmost = true; + struct rb_node *exist; if (ins->type == BTRFS_DELAYED_INSERTION_ITEM) root = &delayed_node->ins_root; else root = &delayed_node->del_root; - p = &root->rb_root.rb_node; - node = &ins->rb_node; - - while (*p) { - parent_node = *p; - item = rb_entry(parent_node, struct btrfs_delayed_item, - rb_node); - - if (item->index < ins->index) { - p = &(*p)->rb_right; - leftmost = false; - } else if (item->index > ins->index) { - p = &(*p)->rb_left; - } else { - return -EEXIST; - } - } - - rb_link_node(node, parent_node, p); - rb_insert_color_cached(node, root, leftmost); + exist = rb_find_add_cached(&ins->rb_node, root, btrfs_delayed_item_cmp); + if (exist) + return -EEXIST; if (ins->type == BTRFS_DELAYED_INSERTION_ITEM && ins->index >= delayed_node->index_cnt) @@ -412,6 +439,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root) static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) { + struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node; struct rb_root_cached *root; struct btrfs_delayed_root *delayed_root; @@ -419,18 +447,19 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) if (RB_EMPTY_NODE(&delayed_item->rb_node)) return; - delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; + /* If it's in a rbtree, then we need to have delayed node locked. */ + lockdep_assert_held(&delayed_node->mutex); - BUG_ON(!delayed_root); + delayed_root = delayed_node->root->fs_info->delayed_root; if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM) - root = &delayed_item->delayed_node->ins_root; + root = &delayed_node->ins_root; else - root = &delayed_item->delayed_node->del_root; + root = &delayed_node->del_root; rb_erase_cached(&delayed_item->rb_node, root); RB_CLEAR_NODE(&delayed_item->rb_node); - delayed_item->delayed_node->count--; + delayed_node->count--; finish_one_item(delayed_root); } @@ -447,40 +476,25 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( struct btrfs_delayed_node *delayed_node) { - struct rb_node *p; - struct btrfs_delayed_item *item = NULL; - - p = rb_first_cached(&delayed_node->ins_root); - if (p) - item = rb_entry(p, struct btrfs_delayed_item, rb_node); + struct rb_node *p = rb_first_cached(&delayed_node->ins_root); - return item; + return rb_entry_safe(p, struct btrfs_delayed_item, rb_node); } static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( struct btrfs_delayed_node *delayed_node) { - struct rb_node *p; - struct btrfs_delayed_item *item = NULL; - - p = rb_first_cached(&delayed_node->del_root); - if (p) - item = rb_entry(p, struct btrfs_delayed_item, rb_node); + struct rb_node *p = rb_first_cached(&delayed_node->del_root); - return item; + return rb_entry_safe(p, struct btrfs_delayed_item, rb_node); } static struct btrfs_delayed_item *__btrfs_next_delayed_item( struct btrfs_delayed_item *item) { - struct rb_node *p; - struct btrfs_delayed_item *next = NULL; + struct rb_node *p = rb_next(&item->rb_node); - p = rb_next(&item->rb_node); - if (p) - next = rb_entry(p, struct btrfs_delayed_item, rb_node); - - return next; + return rb_entry_safe(p, struct btrfs_delayed_item, rb_node); } static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, @@ -513,7 +527,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, /* * For insertions we track reserved metadata space by accounting * for the number of leaves that will be used, based on the delayed - * node's index_items_size field. + * node's curr_index_batch_size and index_item_leaves fields. */ if (item->type == BTRFS_DELAYED_DELETION_ITEM) item->bytes_reserved = num_bytes; @@ -654,7 +668,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, struct btrfs_key first_key; const u32 first_data_size = first_item->data_len; int total_size; - char *ins_data = NULL; + char AUTO_KFREE(ins_data); int ret; bool continuous_keys_only = false; @@ -724,12 +738,10 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, u32 *ins_sizes; int i = 0; - ins_data = kmalloc(batch.nr * sizeof(u32) + - batch.nr * sizeof(struct btrfs_key), GFP_NOFS); - if (!ins_data) { - ret = -ENOMEM; - goto out; - } + ins_data = kmalloc_array(batch.nr, + sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS); + if (!ins_data) + return -ENOMEM; ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32)); batch.keys = ins_keys; @@ -745,7 +757,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_items(trans, root, path, &batch); if (ret) - goto out; + return ret; list_for_each_entry(curr, &item_list, tree_list) { char *data_ptr; @@ -800,9 +812,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, list_del(&curr->tree_list); btrfs_release_delayed_item(curr); } -out: - kfree(ins_data); - return ret; + + return 0; } static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans, @@ -971,7 +982,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) if (delayed_node && test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { - BUG_ON(!delayed_node->root); + ASSERT(delayed_node->root); clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); delayed_node->count--; @@ -1018,27 +1029,55 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, ret = btrfs_lookup_inode(trans, root, path, &key, mod); if (ret > 0) ret = -ENOENT; - if (ret < 0) + if (ret < 0) { + /* + * If we fail to update the delayed inode we need to abort the + * transaction, because we could leave the inode with the + * improper counts behind. + */ + if (unlikely(ret != -ENOENT)) + btrfs_abort_transaction(trans, ret); goto out; + } leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, sizeof(struct btrfs_inode_item)); - btrfs_mark_buffer_dirty(leaf); if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) goto out; - path->slots[0]++; - if (path->slots[0] >= btrfs_header_nritems(leaf)) - goto search; -again: + /* + * Now we're going to delete the INODE_REF/EXTREF, which should be the + * only one ref left. Check if the next item is an INODE_REF/EXTREF. + * + * But if we're the last item already, release and search for the last + * INODE_REF/EXTREF. + */ + if (path->slots[0] + 1 >= btrfs_header_nritems(leaf)) { + key.objectid = node->inode_id; + key.type = BTRFS_INODE_EXTREF_KEY; + key.offset = (u64)-1; + + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (unlikely(ret < 0)) { + btrfs_abort_transaction(trans, ret); + goto err_out; + } + ASSERT(ret > 0); + ASSERT(path->slots[0] > 0); + ret = 0; + path->slots[0]--; + leaf = path->nodes[0]; + } else { + path->slots[0]++; + } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != node->inode_id) goto out; - if (key.type != BTRFS_INODE_REF_KEY && key.type != BTRFS_INODE_EXTREF_KEY) goto out; @@ -1048,39 +1087,16 @@ again: * so there is only one iref. The case that several irefs are * in the same item doesn't exist. */ - btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, path); + if (ret < 0) + btrfs_abort_transaction(trans, ret); out: btrfs_release_delayed_iref(node); btrfs_release_path(path); err_out: btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0)); btrfs_release_delayed_inode(node); - - /* - * If we fail to update the delayed inode we need to abort the - * transaction, because we could leave the inode with the improper - * counts behind. - */ - if (ret && ret != -ENOENT) - btrfs_abort_transaction(trans, ret); - return ret; - -search: - btrfs_release_path(path); - - key.type = BTRFS_INODE_EXTREF_KEY; - key.offset = -1; - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto err_out; - ASSERT(ret); - - ret = 0; - leaf = path->nodes[0]; - path->slots[0]--; - goto again; } static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, @@ -1116,6 +1132,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_record_root_in_trans(trans, node->root); + if (ret) + return ret; ret = btrfs_update_delayed_inode(trans, node->root, path, node); return ret; } @@ -1131,6 +1150,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr) struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_root *delayed_root; struct btrfs_delayed_node *curr_node, *prev_node; + struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker; struct btrfs_path *path; struct btrfs_block_rsv *block_rsv; int ret = 0; @@ -1148,25 +1168,39 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr) delayed_root = fs_info->delayed_root; - curr_node = btrfs_first_delayed_node(delayed_root); + curr_node = btrfs_first_delayed_node(delayed_root, &curr_delayed_node_tracker); while (curr_node && (!count || nr--)) { ret = __btrfs_commit_inode_delayed_items(trans, path, curr_node); - if (ret) { - btrfs_release_delayed_node(curr_node); - curr_node = NULL; + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); break; } prev_node = curr_node; - curr_node = btrfs_next_delayed_node(curr_node); - btrfs_release_delayed_node(prev_node); + prev_delayed_node_tracker = curr_delayed_node_tracker; + curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker); + /* + * See the comment below about releasing path before releasing + * node. If the commit of delayed items was successful the path + * should always be released, but in case of an error, it may + * point to locked extent buffers (a leaf at the very least). + */ + ASSERT(path->nodes[0] == NULL); + btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker); } - if (curr_node) - btrfs_release_delayed_node(curr_node); + /* + * Release the path to avoid a potential deadlock and lockdep splat when + * releasing the delayed node, as that requires taking the delayed node's + * mutex. If another task starts running delayed items before we take + * the mutex, it will first lock the mutex and then it may try to lock + * the same btree path (leaf). + */ btrfs_free_path(path); + + if (curr_node) + btrfs_release_delayed_node(curr_node, &curr_delayed_node_tracker); trans->block_rsv = block_rsv; return ret; @@ -1185,8 +1219,10 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr) int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { - struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); - struct btrfs_path *path; + struct btrfs_ref_tracker delayed_node_tracker; + struct btrfs_delayed_node *delayed_node = + btrfs_get_delayed_node(inode, &delayed_node_tracker); + BTRFS_PATH_AUTO_FREE(path); struct btrfs_block_rsv *block_rsv; int ret; @@ -1196,14 +1232,14 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); if (!delayed_node->count) { mutex_unlock(&delayed_node->mutex); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return 0; } mutex_unlock(&delayed_node->mutex); path = btrfs_alloc_path(); if (!path) { - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return -ENOMEM; } @@ -1212,8 +1248,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); - btrfs_release_delayed_node(delayed_node); - btrfs_free_path(path); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); trans->block_rsv = block_rsv; return ret; @@ -1223,18 +1258,20 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_trans_handle *trans; - struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); + struct btrfs_ref_tracker delayed_node_tracker; + struct btrfs_delayed_node *delayed_node; struct btrfs_path *path; struct btrfs_block_rsv *block_rsv; int ret; + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!delayed_node) return 0; mutex_lock(&delayed_node->mutex); if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { mutex_unlock(&delayed_node->mutex); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return 0; } mutex_unlock(&delayed_node->mutex); @@ -1268,7 +1305,7 @@ trans_out: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); out: - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return ret; } @@ -1282,7 +1319,8 @@ void btrfs_remove_delayed_node(struct btrfs_inode *inode) return; inode->delayed_node = NULL; - btrfs_release_delayed_node(delayed_node); + + btrfs_release_delayed_node(delayed_node, &delayed_node->inode_cache_tracker); } struct btrfs_async_delayed_work { @@ -1298,6 +1336,7 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) struct btrfs_trans_handle *trans; struct btrfs_path *path; struct btrfs_delayed_node *delayed_node = NULL; + struct btrfs_ref_tracker delayed_node_tracker; struct btrfs_root *root; struct btrfs_block_rsv *block_rsv; int total_done = 0; @@ -1314,7 +1353,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) BTRFS_DELAYED_BACKGROUND / 2) break; - delayed_node = btrfs_first_prepared_delayed_node(delayed_root); + delayed_node = btrfs_first_prepared_delayed_node(delayed_root, + &delayed_node_tracker); if (!delayed_node) break; @@ -1323,7 +1363,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { btrfs_release_path(path); - btrfs_release_prepared_delayed_node(delayed_node); + btrfs_release_prepared_delayed_node(delayed_node, + &delayed_node_tracker); total_done++; continue; } @@ -1338,7 +1379,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) btrfs_btree_balance_dirty_nodelay(root->fs_info); btrfs_release_path(path); - btrfs_release_prepared_delayed_node(delayed_node); + btrfs_release_prepared_delayed_node(delayed_node, + &delayed_node_tracker); total_done++; } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) @@ -1361,8 +1403,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, return -ENOMEM; async_work->delayed_root = delayed_root; - btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL, - NULL); + btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL); async_work->nr = nr; btrfs_queue_work(fs_info->delayed_workers, &async_work->work); @@ -1371,20 +1412,28 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) { - WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root)); + struct btrfs_ref_tracker delayed_node_tracker; + struct btrfs_delayed_node *node; + + node = btrfs_first_delayed_node( fs_info->delayed_root, &delayed_node_tracker); + if (WARN_ON(node)) { + btrfs_delayed_node_ref_tracker_free(node, + &delayed_node_tracker); + refcount_dec(&node->refs); + } } -static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) +static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) { int val = atomic_read(&delayed_root->items_seq); if (val < seq || val >= seq + BTRFS_DELAYED_BATCH) - return 1; + return true; if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) - return 1; + return true; - return 0; + return false; } void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) @@ -1413,23 +1462,46 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); } -/* Will return 0 or -ENOMEM */ +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); + + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) + return; + + /* + * Adding the new dir index item does not require touching another + * leaf, so we can release 1 unit of metadata that was previously + * reserved when starting the transaction. This applies only to + * the case where we had a transaction start and excludes the + * transaction join case (when replaying log trees). + */ + trace_btrfs_space_reservation(fs_info, "transaction", + trans->transid, bytes, 0); + btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); + ASSERT(trans->bytes_reserved >= bytes); + trans->bytes_reserved -= bytes; +} + +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, const char *name, int name_len, struct btrfs_inode *dir, - struct btrfs_disk_key *disk_key, u8 flags, + const struct btrfs_disk_key *disk_key, u8 flags, u64 index) { struct btrfs_fs_info *fs_info = trans->fs_info; const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info); struct btrfs_delayed_node *delayed_node; + struct btrfs_ref_tracker delayed_node_tracker; struct btrfs_delayed_item *delayed_item; struct btrfs_dir_item *dir_item; bool reserve_leaf_space; u32 data_len; int ret; - delayed_node = btrfs_get_or_create_delayed_node(dir); + delayed_node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); @@ -1455,6 +1527,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); + /* + * First attempt to insert the delayed item. This is to make the error + * handling path simpler in case we fail (-EEXIST). There's no risk of + * any other task coming in and running the delayed item before we do + * the metadata space reservation below, because we are holding the + * delayed node's mutex and that mutex must also be locked before the + * node's delayed items can be run. + */ + ret = __btrfs_add_delayed_item(delayed_node, delayed_item); + if (unlikely(ret)) { + btrfs_err(trans->fs_info, +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", + name_len, name, index, btrfs_root_id(delayed_node->root), + delayed_node->inode_id, dir->index_cnt, + delayed_node->index_cnt, ret); + btrfs_release_delayed_item(delayed_item); + btrfs_release_dir_index_item_space(trans); + mutex_unlock(&delayed_node->mutex); + goto release_node; + } + if (delayed_node->index_item_leaves == 0 || delayed_node->curr_index_batch_size + data_len > leaf_data_size) { delayed_node->curr_index_batch_size = data_len; @@ -1472,47 +1565,24 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, * impossible. */ if (WARN_ON(ret)) { - mutex_unlock(&delayed_node->mutex); btrfs_release_delayed_item(delayed_item); + mutex_unlock(&delayed_node->mutex); goto release_node; } delayed_node->index_item_leaves++; - } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { - const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); - - /* - * Adding the new dir index item does not require touching another - * leaf, so we can release 1 unit of metadata that was previously - * reserved when starting the transaction. This applies only to - * the case where we had a transaction start and excludes the - * transaction join case (when replaying log trees). - */ - trace_btrfs_space_reservation(fs_info, "transaction", - trans->transid, bytes, 0); - btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); - ASSERT(trans->bytes_reserved >= bytes); - trans->bytes_reserved -= bytes; - } - - ret = __btrfs_add_delayed_item(delayed_node, delayed_item); - if (unlikely(ret)) { - btrfs_err(trans->fs_info, - "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", - name_len, name, delayed_node->root->root_key.objectid, - delayed_node->inode_id, ret); - BUG(); + } else { + btrfs_release_dir_index_item_space(trans); } mutex_unlock(&delayed_node->mutex); release_node: - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return ret; } -static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_node *node, - u64 index) +static bool btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node, + u64 index) { struct btrfs_delayed_item *item; @@ -1520,7 +1590,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info, item = __btrfs_lookup_delayed_item(&node->ins_root.rb_root, index); if (!item) { mutex_unlock(&node->mutex); - return 1; + return false; } /* @@ -1555,23 +1625,25 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info, } mutex_unlock(&node->mutex); - return 0; + return true; } int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, u64 index) { struct btrfs_delayed_node *node; + struct btrfs_ref_tracker delayed_node_tracker; struct btrfs_delayed_item *item; int ret; - node = btrfs_get_or_create_delayed_node(dir); + node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker); if (IS_ERR(node)) return PTR_ERR(node); - ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, index); - if (!ret) + if (btrfs_delete_delayed_insertion_item(node, index)) { + ret = 0; goto end; + } item = btrfs_alloc_delayed_item(0, node, BTRFS_DELAYED_DELETION_ITEM); if (!item) { @@ -1588,7 +1660,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, */ if (ret < 0) { btrfs_err(trans->fs_info, -"metadata reservation failed for delayed dir item deltiona, should have been reserved"); +"metadata reservation failed for delayed dir item deletion, index: %llu, root: %llu, inode: %llu, error: %d", + index, btrfs_root_id(node->root), node->inode_id, ret); btrfs_release_delayed_item(item); goto end; } @@ -1597,22 +1670,23 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, ret = __btrfs_add_delayed_item(node, item); if (unlikely(ret)) { btrfs_err(trans->fs_info, - "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", - index, node->root->root_key.objectid, - node->inode_id, ret); +"failed to add delayed dir index item, root: %llu, inode: %llu, index: %llu, error: %d", + index, btrfs_root_id(node->root), node->inode_id, ret); btrfs_delayed_item_release_metadata(dir->root, item); btrfs_release_delayed_item(item); } mutex_unlock(&node->mutex); end: - btrfs_release_delayed_node(node); + btrfs_release_delayed_node(node, &delayed_node_tracker); return ret; } int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) { - struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); + struct btrfs_ref_tracker delayed_node_tracker; + struct btrfs_delayed_node *delayed_node; + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!delayed_node) return -ENOENT; @@ -1622,23 +1696,25 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) * is updated now. So we needn't lock the delayed node. */ if (!delayed_node->index_cnt) { - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return -EINVAL; } inode->index_cnt = delayed_node->index_cnt; - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return 0; } -bool btrfs_readdir_get_delayed_items(struct inode *inode, +bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list) { struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_item *item; + struct btrfs_ref_tracker delayed_node_tracker; - delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!delayed_node) return false; @@ -1646,19 +1722,19 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, * We can only do one readdir with delayed items at a time because of * item->readdir_list. */ - btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); - btrfs_inode_lock(BTRFS_I(inode), 0); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); + btrfs_inode_lock(inode, 0); mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, ins_list); item = __btrfs_next_delayed_item(item); } item = __btrfs_first_delayed_deletion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, del_list); item = __btrfs_next_delayed_item(item); @@ -1673,12 +1749,13 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, * insert/delete delayed items in this period. So we also needn't * requeue or dequeue this delayed node. */ + btrfs_delayed_node_ref_tracker_free(delayed_node, &delayed_node_tracker); refcount_dec(&delayed_node->refs); return true; } -void btrfs_readdir_put_delayed_items(struct inode *inode, +void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode, struct list_head *ins_list, struct list_head *del_list) { @@ -1700,20 +1777,19 @@ void btrfs_readdir_put_delayed_items(struct inode *inode, * The VFS is going to do up_read(), so we need to downgrade back to a * read lock. */ - downgrade_write(&inode->i_rwsem); + downgrade_write(&inode->vfs_inode.i_rwsem); } -int btrfs_should_delete_dir_index(struct list_head *del_list, - u64 index) +bool btrfs_should_delete_dir_index(const struct list_head *del_list, u64 index) { struct btrfs_delayed_item *curr; - int ret = 0; + bool ret = false; list_for_each_entry(curr, del_list, readdir_list) { if (curr->index > index) break; if (curr->index == index) { - ret = 1; + ret = true; break; } } @@ -1721,29 +1797,26 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, } /* - * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree - * + * Read dir info stored in the delayed tree. */ -int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list) +bool btrfs_readdir_delayed_dir_index(struct dir_context *ctx, + const struct list_head *ins_list) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; struct btrfs_key location; char *name; int name_len; - int over = 0; unsigned char d_type; - if (list_empty(ins_list)) - return 0; - /* * Changing the data of the delayed item is impossible. So * we needn't lock them. And we have held i_mutex of the * directory, nobody can delete any directory indexes now. */ list_for_each_entry_safe(curr, next, ins_list, readdir_list) { + bool over; + list_del(&curr->readdir_list); if (curr->index < ctx->pos) { @@ -1761,137 +1834,130 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, d_type = fs_ftype_to_dtype(btrfs_dir_flags_to_ftype(di->type)); btrfs_disk_key_to_cpu(&location, &di->location); - over = !dir_emit(ctx, name, name_len, - location.objectid, d_type); + over = !dir_emit(ctx, name, name_len, location.objectid, d_type); if (refcount_dec_and_test(&curr->refs)) kfree(curr); if (over) - return 1; + return true; ctx->pos++; } - return 0; + return false; } static void fill_stack_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item, - struct inode *inode) + struct btrfs_inode *inode) { + struct inode *vfs_inode = &inode->vfs_inode; u64 flags; - btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); - btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); - btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); - btrfs_set_stack_inode_mode(inode_item, inode->i_mode); - btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); - btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); - btrfs_set_stack_inode_generation(inode_item, - BTRFS_I(inode)->generation); + btrfs_set_stack_inode_uid(inode_item, i_uid_read(vfs_inode)); + btrfs_set_stack_inode_gid(inode_item, i_gid_read(vfs_inode)); + btrfs_set_stack_inode_size(inode_item, inode->disk_i_size); + btrfs_set_stack_inode_mode(inode_item, vfs_inode->i_mode); + btrfs_set_stack_inode_nlink(inode_item, vfs_inode->i_nlink); + btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(vfs_inode)); + btrfs_set_stack_inode_generation(inode_item, inode->generation); btrfs_set_stack_inode_sequence(inode_item, - inode_peek_iversion(inode)); + inode_peek_iversion(vfs_inode)); btrfs_set_stack_inode_transid(inode_item, trans->transid); - btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); - flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags, - BTRFS_I(inode)->ro_flags); + btrfs_set_stack_inode_rdev(inode_item, vfs_inode->i_rdev); + flags = btrfs_inode_combine_flags(inode->flags, inode->ro_flags); btrfs_set_stack_inode_flags(inode_item, flags); btrfs_set_stack_inode_block_group(inode_item, 0); btrfs_set_stack_timespec_sec(&inode_item->atime, - inode->i_atime.tv_sec); + inode_get_atime_sec(vfs_inode)); btrfs_set_stack_timespec_nsec(&inode_item->atime, - inode->i_atime.tv_nsec); + inode_get_atime_nsec(vfs_inode)); btrfs_set_stack_timespec_sec(&inode_item->mtime, - inode->i_mtime.tv_sec); + inode_get_mtime_sec(vfs_inode)); btrfs_set_stack_timespec_nsec(&inode_item->mtime, - inode->i_mtime.tv_nsec); + inode_get_mtime_nsec(vfs_inode)); btrfs_set_stack_timespec_sec(&inode_item->ctime, - inode->i_ctime.tv_sec); + inode_get_ctime_sec(vfs_inode)); btrfs_set_stack_timespec_nsec(&inode_item->ctime, - inode->i_ctime.tv_nsec); + inode_get_ctime_nsec(vfs_inode)); - btrfs_set_stack_timespec_sec(&inode_item->otime, - BTRFS_I(inode)->i_otime.tv_sec); - btrfs_set_stack_timespec_nsec(&inode_item->otime, - BTRFS_I(inode)->i_otime.tv_nsec); + btrfs_set_stack_timespec_sec(&inode_item->otime, inode->i_otime_sec); + btrfs_set_stack_timespec_nsec(&inode_item->otime, inode->i_otime_nsec); } -int btrfs_fill_inode(struct inode *inode, u32 *rdev) +int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev) { - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_delayed_node *delayed_node; + struct btrfs_ref_tracker delayed_node_tracker; struct btrfs_inode_item *inode_item; + struct inode *vfs_inode = &inode->vfs_inode; - delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!delayed_node) return -ENOENT; mutex_lock(&delayed_node->mutex); if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { mutex_unlock(&delayed_node->mutex); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return -ENOENT; } inode_item = &delayed_node->inode_item; - i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); - i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); - btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item)); - btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0, - round_up(i_size_read(inode), fs_info->sectorsize)); - inode->i_mode = btrfs_stack_inode_mode(inode_item); - set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); - inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); - BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); - BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); - - inode_set_iversion_queried(inode, - btrfs_stack_inode_sequence(inode_item)); - inode->i_rdev = 0; + i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item)); + i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item)); + btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); + vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item); + set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item)); + inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item)); + inode->generation = btrfs_stack_inode_generation(inode_item); + inode->last_trans = btrfs_stack_inode_transid(inode_item); + + inode_set_iversion_queried(vfs_inode, btrfs_stack_inode_sequence(inode_item)); + vfs_inode->i_rdev = 0; *rdev = btrfs_stack_inode_rdev(inode_item); btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item), - &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags); + &inode->flags, &inode->ro_flags); - inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime); + inode_set_atime(vfs_inode, btrfs_stack_timespec_sec(&inode_item->atime), + btrfs_stack_timespec_nsec(&inode_item->atime)); - inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime); + inode_set_mtime(vfs_inode, btrfs_stack_timespec_sec(&inode_item->mtime), + btrfs_stack_timespec_nsec(&inode_item->mtime)); - inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime); + inode_set_ctime(vfs_inode, btrfs_stack_timespec_sec(&inode_item->ctime), + btrfs_stack_timespec_nsec(&inode_item->ctime)); - BTRFS_I(inode)->i_otime.tv_sec = - btrfs_stack_timespec_sec(&inode_item->otime); - BTRFS_I(inode)->i_otime.tv_nsec = - btrfs_stack_timespec_nsec(&inode_item->otime); + inode->i_otime_sec = btrfs_stack_timespec_sec(&inode_item->otime); + inode->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime); - inode->i_generation = BTRFS_I(inode)->generation; - BTRFS_I(inode)->index_cnt = (u64)-1; + vfs_inode->i_generation = inode->generation; + if (S_ISDIR(vfs_inode->i_mode)) + inode->index_cnt = (u64)-1; mutex_unlock(&delayed_node->mutex); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return 0; } int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode) { + struct btrfs_root *root = inode->root; struct btrfs_delayed_node *delayed_node; + struct btrfs_ref_tracker delayed_node_tracker; int ret = 0; - delayed_node = btrfs_get_or_create_delayed_node(inode); + delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); mutex_lock(&delayed_node->mutex); if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { - fill_stack_inode_item(trans, &delayed_node->inode_item, - &inode->vfs_inode); + fill_stack_inode_item(trans, &delayed_node->inode_item, inode); goto release_node; } @@ -1899,13 +1965,13 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, if (ret) goto release_node; - fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode); + fill_stack_inode_item(trans, &delayed_node->inode_item, inode); set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); delayed_node->count++; atomic_inc(&root->fs_info->delayed_root->items); release_node: mutex_unlock(&delayed_node->mutex); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return ret; } @@ -1913,6 +1979,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_delayed_node *delayed_node; + struct btrfs_ref_tracker delayed_node_tracker; /* * we don't do delayed inode updates during log recovery because it @@ -1922,7 +1989,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) return -EAGAIN; - delayed_node = btrfs_get_or_create_delayed_node(inode); + delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); @@ -1941,15 +2008,12 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) * It is very rare. */ mutex_lock(&delayed_node->mutex); - if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) - goto release_node; - - set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags); - delayed_node->count++; - atomic_inc(&fs_info->delayed_root->items); -release_node: + if (!test_and_set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) { + delayed_node->count++; + atomic_inc(&fs_info->delayed_root->items); + } mutex_unlock(&delayed_node->mutex); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); return 0; } @@ -1993,47 +2057,56 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) { struct btrfs_delayed_node *delayed_node; + struct btrfs_ref_tracker delayed_node_tracker; - delayed_node = btrfs_get_delayed_node(inode); + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!delayed_node) return; __btrfs_kill_delayed_node(delayed_node); - btrfs_release_delayed_node(delayed_node); + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); } void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) { - u64 inode_id = 0; + unsigned long index = 0; struct btrfs_delayed_node *delayed_nodes[8]; - int i, n; + struct btrfs_ref_tracker delayed_node_trackers[8]; while (1) { - spin_lock(&root->inode_lock); - n = radix_tree_gang_lookup(&root->delayed_nodes_tree, - (void **)delayed_nodes, inode_id, - ARRAY_SIZE(delayed_nodes)); - if (!n) { - spin_unlock(&root->inode_lock); - break; + struct btrfs_delayed_node *node; + int count; + + xa_lock(&root->delayed_nodes); + if (xa_empty(&root->delayed_nodes)) { + xa_unlock(&root->delayed_nodes); + return; } - inode_id = delayed_nodes[n - 1]->inode_id + 1; - for (i = 0; i < n; i++) { + count = 0; + xa_for_each_start(&root->delayed_nodes, index, node, index) { /* * Don't increase refs in case the node is dead and * about to be removed from the tree in the loop below */ - if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) - delayed_nodes[i] = NULL; + if (refcount_inc_not_zero(&node->refs)) { + btrfs_delayed_node_ref_tracker_alloc(node, + &delayed_node_trackers[count], + GFP_ATOMIC); + delayed_nodes[count] = node; + count++; + } + if (count >= ARRAY_SIZE(delayed_nodes)) + break; } - spin_unlock(&root->inode_lock); + xa_unlock(&root->delayed_nodes); + index++; - for (i = 0; i < n; i++) { - if (!delayed_nodes[i]) - continue; + for (int i = 0; i < count; i++) { __btrfs_kill_delayed_node(delayed_nodes[i]); - btrfs_release_delayed_node(delayed_nodes[i]); + btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); + btrfs_release_delayed_node(delayed_nodes[i], + &delayed_node_trackers[i]); } } } @@ -2041,14 +2114,17 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info) { struct btrfs_delayed_node *curr_node, *prev_node; + struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker; - curr_node = btrfs_first_delayed_node(fs_info->delayed_root); + curr_node = btrfs_first_delayed_node(fs_info->delayed_root, + &curr_delayed_node_tracker); while (curr_node) { __btrfs_kill_delayed_node(curr_node); prev_node = curr_node; - curr_node = btrfs_next_delayed_node(curr_node); - btrfs_release_delayed_node(prev_node); + prev_delayed_node_tracker = curr_delayed_node_tracker; + curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker); + btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker); } } @@ -2058,8 +2134,9 @@ void btrfs_log_get_delayed_items(struct btrfs_inode *inode, { struct btrfs_delayed_node *node; struct btrfs_delayed_item *item; + struct btrfs_ref_tracker delayed_node_tracker; - node = btrfs_get_delayed_node(inode); + node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!node) return; @@ -2117,6 +2194,7 @@ void btrfs_log_get_delayed_items(struct btrfs_inode *inode, * delete delayed items. */ ASSERT(refcount_read(&node->refs) > 1); + btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker); refcount_dec(&node->refs); } @@ -2127,8 +2205,9 @@ void btrfs_log_put_delayed_items(struct btrfs_inode *inode, struct btrfs_delayed_node *node; struct btrfs_delayed_item *item; struct btrfs_delayed_item *next; + struct btrfs_ref_tracker delayed_node_tracker; - node = btrfs_get_delayed_node(inode); + node = btrfs_get_delayed_node(inode, &delayed_node_tracker); if (!node) return; @@ -2160,5 +2239,6 @@ void btrfs_log_put_delayed_items(struct btrfs_inode *inode, * delete delayed items. */ ASSERT(refcount_read(&node->refs) > 1); + btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker); refcount_dec(&node->refs); } |
