diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 324 |
1 files changed, 219 insertions, 105 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 331fc7429952..955d1677e865 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -13,13 +13,11 @@ #include "tree-log.h" #include "disk-io.h" #include "locking.h" -#include "print-tree.h" #include "backref.h" #include "compression.h" #include "qgroup.h" #include "block-group.h" #include "space-info.h" -#include "zoned.h" #include "inode-item.h" #include "fs.h" #include "accessors.h" @@ -140,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid); * and once to do all the other items. */ +static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) +{ + unsigned int nofs_flag; + struct inode *inode; + + /* + * We're holding a transaction handle whether we are logging or + * replaying a log tree, so we must make sure NOFS semantics apply + * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL + * to allocate an inode, which can recurse back into the filesystem and + * attempt a transaction commit, resulting in a deadlock. + */ + nofs_flag = memalloc_nofs_save(); + inode = btrfs_iget(objectid, root); + memalloc_nofs_restore(nofs_flag); + + return inode; +} + /* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people @@ -393,7 +410,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans, * the leaf before writing into the log tree. See the comments at * copy_items() for more details. */ - ASSERT(root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); + ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID); item_size = btrfs_item_size(eb, slot); src_ptr = btrfs_item_ptr_offset(eb, slot); @@ -573,7 +590,6 @@ insert: } } no_copy: - btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); return 0; } @@ -602,7 +618,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, { struct inode *inode; - inode = btrfs_iget(root->fs_info->sb, objectid, root); + inode = btrfs_iget_logging(objectid, root); if (IS_ERR(inode)) inode = NULL; return inode; @@ -750,7 +766,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, goto out; if (ins.objectid > 0) { - struct btrfs_ref ref = { 0 }; u64 csum_start; u64 csum_end; LIST_HEAD(ordered_sums); @@ -764,13 +779,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (ret < 0) { goto out; } else if (ret == 0) { - btrfs_init_generic_ref(&ref, - BTRFS_ADD_DELAYED_REF, - ins.objectid, ins.offset, 0, - root->root_key.objectid); - btrfs_init_data_ref(&ref, - root->root_key.objectid, - key->objectid, offset, 0, false); + struct btrfs_ref ref = { + .action = BTRFS_ADD_DELAYED_REF, + .bytenr = ins.objectid, + .num_bytes = ins.offset, + .owning_root = btrfs_root_id(root), + .ref_root = btrfs_root_id(root), + }; + btrfs_init_data_ref(&ref, key->objectid, offset, + 0, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) goto out; @@ -780,7 +797,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * allocation tree */ ret = btrfs_alloc_logged_file_extent(trans, - root->root_key.objectid, + btrfs_root_id(root), key->objectid, offset, &ins); if (ret) goto out; @@ -799,9 +816,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_lookup_csums_list(root->log_root, csum_start, csum_end - 1, - &ordered_sums, 0, false); - if (ret) + &ordered_sums, false); + if (ret < 0) goto out; + ret = 0; /* * Now delete all existing cums in the csum root that * cover our range. We do this because we can have an @@ -1355,7 +1373,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct inode *inode = NULL; unsigned long ref_ptr; unsigned long ref_end; - struct fscrypt_str name; + struct fscrypt_str name = { 0 }; int ret; int log_ref_ver = 0; u64 parent_objectid; @@ -1625,7 +1643,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, if (ret) goto out; } - BTRFS_I(inode)->index_cnt = (u64)-1; + if (S_ISDIR(inode->i_mode)) + BTRFS_I(inode)->index_cnt = (u64)-1; if (inode->i_nlink == 0) { if (S_ISDIR(inode->i_mode)) { @@ -1825,7 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di, struct btrfs_key *key) { - struct fscrypt_str name; + struct fscrypt_str name = { 0 }; struct btrfs_dir_item *dir_dst_di; struct btrfs_dir_item *index_dst_di; bool dir_dst_matches = false; @@ -2105,7 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, struct extent_buffer *eb; int slot; struct btrfs_dir_item *di; - struct fscrypt_str name; + struct fscrypt_str name = { 0 }; struct inode *inode = NULL; struct btrfs_key location; @@ -2820,6 +2839,52 @@ static void wait_for_writer(struct btrfs_root *root) finish_wait(&root->log_writer_wait, &wait); } +void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode) +{ + ctx->log_ret = 0; + ctx->log_transid = 0; + ctx->log_new_dentries = false; + ctx->logging_new_name = false; + ctx->logging_new_delayed_dentries = false; + ctx->logged_before = false; + ctx->inode = inode; + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->ordered_extents); + INIT_LIST_HEAD(&ctx->conflict_inodes); + ctx->num_conflict_inodes = 0; + ctx->logging_conflict_inodes = false; + ctx->scratch_eb = NULL; +} + +void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx) +{ + struct btrfs_inode *inode = ctx->inode; + + if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) && + !test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags)) + return; + + /* + * Don't care about allocation failure. This is just for optimization, + * if we fail to allocate here, we will try again later if needed. + */ + ctx->scratch_eb = alloc_dummy_extent_buffer(inode->root->fs_info, 0); +} + +void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx) +{ + struct btrfs_ordered_extent *ordered; + struct btrfs_ordered_extent *tmp; + + btrfs_assert_inode_locked(ctx->inode); + + list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) { + list_del_init(&ordered->log_list); + btrfs_put_ordered_extent(ordered); + } +} + + static inline void btrfs_remove_log_ctx(struct btrfs_root *root, struct btrfs_log_ctx *ctx) { @@ -3001,7 +3066,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret != -ENOSPC) btrfs_err(fs_info, "failed to update log for root %llu ret %d", - root->root_key.objectid, ret); + btrfs_root_id(root), ret); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); goto out; @@ -3522,7 +3587,6 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, last_offset = max(last_offset, curr_end); } btrfs_set_dir_log_end(path->nodes[0], item, last_offset); - btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); return 0; } @@ -3619,6 +3683,30 @@ out: return ret; } +static int clone_leaf(struct btrfs_path *path, struct btrfs_log_ctx *ctx) +{ + const int slot = path->slots[0]; + + if (ctx->scratch_eb) { + copy_extent_buffer_full(ctx->scratch_eb, path->nodes[0]); + } else { + ctx->scratch_eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!ctx->scratch_eb) + return -ENOMEM; + } + + btrfs_release_path(path); + path->nodes[0] = ctx->scratch_eb; + path->slots[0] = slot; + /* + * Add extra ref to scratch eb so that it is not freed when callers + * release the path, so we can reuse it later if needed. + */ + atomic_inc(&ctx->scratch_eb->refs); + + return 0; +} + static int process_dir_items_leaf(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, @@ -3633,23 +3721,20 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, bool last_found = false; int batch_start = 0; int batch_size = 0; - int i; + int ret; /* * We need to clone the leaf, release the read lock on it, and use the * clone before modifying the log tree. See the comment at copy_items() * about why we need to do this. */ - src = btrfs_clone_extent_buffer(path->nodes[0]); - if (!src) - return -ENOMEM; + ret = clone_leaf(path, ctx); + if (ret < 0) + return ret; - i = path->slots[0]; - btrfs_release_path(path); - path->nodes[0] = src; - path->slots[0] = i; + src = path->nodes[0]; - for (; i < nritems; i++) { + for (int i = path->slots[0]; i < nritems; i++) { struct btrfs_dir_item *di; struct btrfs_key key; int ret; @@ -4167,8 +4252,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, bool inode_item_dropped) { struct btrfs_inode_item *inode_item; + struct btrfs_key key; int ret; + btrfs_get_inode_key(inode, &key); /* * If we are doing a fast fsync and the inode was logged before in the * current transaction, then we know the inode was previously logged and @@ -4180,7 +4267,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans, * already exists can also result in unnecessarily splitting a leaf. */ if (!inode_item_dropped && inode->logged_trans == trans->transid) { - ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1); + ret = btrfs_search_slot(trans, log, &key, path, 0, 1); ASSERT(ret <= 0); if (ret > 0) ret = -ENOENT; @@ -4194,7 +4281,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans, * the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime * flags and set ->logged_trans to 0. */ - ret = btrfs_insert_empty_item(trans, log, path, &inode->location, + ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*inode_item)); ASSERT(ret != -EEXIST); } @@ -4259,17 +4346,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_path *dst_path, struct btrfs_path *src_path, int start_slot, int nr, int inode_only, - u64 logged_isize) + u64 logged_isize, struct btrfs_log_ctx *ctx) { struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; struct extent_buffer *src; - int ret = 0; + int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; struct btrfs_item_batch batch; char *ins_data; - int i; int dst_index; const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); @@ -4302,14 +4388,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * while the other is holding the delayed node's mutex and wants to * write lock the same subvolume leaf for flushing delayed items. */ - src = btrfs_clone_extent_buffer(src_path->nodes[0]); - if (!src) - return -ENOMEM; + ret = clone_leaf(src_path, ctx); + if (ret < 0) + return ret; - i = src_path->slots[0]; - btrfs_release_path(src_path); - src_path->nodes[0] = src; - src_path->slots[0] = i; + src = src_path->nodes[0]; ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); @@ -4324,7 +4407,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, batch.nr = 0; dst_index = 0; - for (i = 0; i < nr; i++) { + for (int i = 0; i < nr; i++) { const int src_slot = start_slot + i; struct btrfs_root *csum_root; struct btrfs_ordered_sum *sums; @@ -4399,9 +4482,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, disk_bytenr += extent_offset; ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, disk_bytenr + extent_num_bytes - 1, - &ordered_sums, 0, false); - if (ret) + &ordered_sums, false); + if (ret < 0) goto out; + ret = 0; list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) { if (!ret) @@ -4431,7 +4515,7 @@ add_to_batch: goto out; dst_index = 0; - for (i = 0; i < nr; i++) { + for (int i = 0; i < nr; i++) { const int src_slot = start_slot + i; const int dst_slot = dst_path->slots[0] + dst_index; struct btrfs_key key; @@ -4480,7 +4564,6 @@ copy_item: dst_index++; } - btrfs_mark_buffer_dirty(trans, dst_path->nodes[0]); btrfs_release_path(dst_path); out: kfree(ins_data); @@ -4511,16 +4594,17 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, { struct btrfs_ordered_extent *ordered; struct btrfs_root *csum_root; + u64 block_start; u64 csum_offset; u64 csum_len; - u64 mod_start = em->mod_start; - u64 mod_len = em->mod_len; + u64 mod_start = em->start; + u64 mod_len = em->len; LIST_HEAD(ordered_sums); int ret = 0; if (inode->flags & BTRFS_INODE_NODATASUM || (em->flags & EXTENT_FLAG_PREALLOC) || - em->block_start == EXTENT_MAP_HOLE) + em->disk_bytenr == EXTENT_MAP_HOLE) return 0; list_for_each_entry(ordered, &ctx->ordered_extents, log_list) { @@ -4584,19 +4668,21 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, /* If we're compressed we have to save the entire range of csums. */ if (extent_map_is_compressed(em)) { csum_offset = 0; - csum_len = max(em->block_len, em->orig_block_len); + csum_len = em->disk_num_bytes; } else { csum_offset = mod_start - em->start; csum_len = mod_len; } /* block start is already adjusted for the file extent offset. */ - csum_root = btrfs_csum_root(trans->fs_info, em->block_start); - ret = btrfs_lookup_csums_list(csum_root, em->block_start + csum_offset, - em->block_start + csum_offset + - csum_len - 1, &ordered_sums, 0, false); - if (ret) + block_start = extent_map_block_start(em); + csum_root = btrfs_csum_root(trans->fs_info, block_start); + ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset, + block_start + csum_offset + csum_len - 1, + &ordered_sums, false); + if (ret < 0) return ret; + ret = 0; while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, @@ -4623,7 +4709,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; enum btrfs_compression_type compress_type; - u64 extent_offset = em->start - em->orig_start; + u64 extent_offset = em->offset; + u64 block_start = extent_map_block_start(em); u64 block_len; int ret; @@ -4633,14 +4720,13 @@ static int log_one_extent(struct btrfs_trans_handle *trans, else btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG); - block_len = max(em->block_len, em->orig_block_len); + block_len = em->disk_num_bytes; compress_type = extent_map_compression(em); if (compress_type != BTRFS_COMPRESS_NONE) { - btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start); + btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start); btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len); - } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { - btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start - - extent_offset); + } else if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) { + btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start - extent_offset); btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len); } @@ -4687,7 +4773,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, &fi, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(fi)); - btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); @@ -4704,7 +4789,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans, */ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, - struct btrfs_path *path) + struct btrfs_path *path, + struct btrfs_log_ctx *ctx) { struct btrfs_root *root = inode->root; struct btrfs_key key; @@ -4770,7 +4856,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - start_slot, ins_nr, 1, 0); + start_slot, ins_nr, 1, 0, ctx); if (ret < 0) goto out; ins_nr = 0; @@ -4794,18 +4880,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (!dropped_extents) { - /* - * Avoid logging extent items logged in past fsync calls - * and leading to duplicate keys in the log tree. - */ + /* + * Avoid overlapping items in the log tree. The first time we + * get here, get rid of everything from a past fsync. After + * that, if the current extent starts before the end of the last + * extent we copied, truncate the last one. This can happen if + * an ordered extent completion modifies the subvolume tree + * while btrfs_next_leaf() has the tree unlocked. + */ + if (!dropped_extents || key.offset < truncate_offset) { ret = truncate_inode_items(trans, root->log_root, inode, - truncate_offset, + min(key.offset, truncate_offset), BTRFS_EXTENT_DATA_KEY); if (ret) goto out; dropped_extents = true; } + truncate_offset = btrfs_file_extent_end(path); if (ins_nr == 0) start_slot = slot; ins_nr++; @@ -4820,7 +4911,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } if (ins_nr > 0) ret = copy_items(trans, inode, dst_path, path, - start_slot, ins_nr, 1, 0); + start_slot, ins_nr, 1, 0, ctx); out: btrfs_release_path(path); btrfs_free_path(dst_path); @@ -4883,7 +4974,7 @@ process: * private list. */ if (ret) { - clear_em_logging(tree, em); + clear_em_logging(inode, em); free_extent_map(em); continue; } @@ -4892,14 +4983,14 @@ process: ret = log_one_extent(trans, inode, em, path, ctx); write_lock(&tree->lock); - clear_em_logging(tree, em); + clear_em_logging(inode, em); free_extent_map(em); } WARN_ON(!list_empty(&extents)); write_unlock(&tree->lock); if (!ret) - ret = btrfs_log_prealloc_extents(trans, inode, path); + ret = btrfs_log_prealloc_extents(trans, inode, path, ctx); if (ret) return ret; @@ -4980,7 +5071,8 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, - struct btrfs_path *dst_path) + struct btrfs_path *dst_path, + struct btrfs_log_ctx *ctx) { struct btrfs_root *root = inode->root; int ret; @@ -5009,7 +5101,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (slot >= nritems) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - start_slot, ins_nr, 1, 0); + start_slot, ins_nr, 1, 0, ctx); if (ret < 0) return ret; ins_nr = 0; @@ -5035,7 +5127,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, } if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - start_slot, ins_nr, 1, 0); + start_slot, ins_nr, 1, 0, ctx); if (ret < 0) return ret; } @@ -5366,7 +5458,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { struct btrfs_root *root = start_inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; LIST_HEAD(dir_list); struct btrfs_dir_list *dir_elem; @@ -5427,7 +5518,7 @@ again: continue; btrfs_release_path(path); - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); + di_inode = btrfs_iget_logging(di_key.objectid, root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); goto out; @@ -5487,7 +5578,7 @@ again: btrfs_add_delayed_iput(curr_inode); curr_inode = NULL; - vfs_inode = btrfs_iget(fs_info->sb, ino, root); + vfs_inode = btrfs_iget_logging(ino, root); if (IS_ERR(vfs_inode)) { ret = PTR_ERR(vfs_inode); break; @@ -5582,7 +5673,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) return BTRFS_LOG_FORCE_COMMIT; - inode = btrfs_iget(root->fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was deleted in * the current transaction then we either: @@ -5683,7 +5774,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; /* @@ -5714,7 +5804,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, list_del(&curr->list); kfree(curr); - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was * deleted in the current transaction, we need to log its parent @@ -5725,7 +5815,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, if (ret != -ENOENT) break; - inode = btrfs_iget(fs_info->sb, parent, root); + inode = btrfs_iget_logging(parent, root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); break; @@ -5838,7 +5928,7 @@ again: if (ret < 0) { return ret; } else if (ret > 0 && - other_ino != btrfs_ino(BTRFS_I(ctx->inode))) { + other_ino != btrfs_ino(ctx->inode)) { if (ins_nr > 0) { ins_nr++; } else { @@ -5847,7 +5937,7 @@ again: } ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, - inode_only, logged_isize); + inode_only, logged_isize, ctx); if (ret < 0) return ret; ins_nr = 0; @@ -5866,7 +5956,7 @@ again: goto next_slot; ret = copy_items(trans, inode, dst_path, path, ins_start_slot, - ins_nr, inode_only, logged_isize); + ins_nr, inode_only, logged_isize, ctx); if (ret < 0) return ret; ins_nr = 0; @@ -5883,7 +5973,7 @@ again: } ret = copy_items(trans, inode, dst_path, path, ins_start_slot, - ins_nr, inode_only, logged_isize); + ins_nr, inode_only, logged_isize, ctx); if (ret < 0) return ret; ins_nr = 1; @@ -5898,7 +5988,7 @@ next_slot: if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, - logged_isize); + logged_isize, ctx); if (ret < 0) return ret; ins_nr = 0; @@ -5923,7 +6013,7 @@ next_key: } if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, ins_start_slot, - ins_nr, inode_only, logged_isize); + ins_nr, inode_only, logged_isize, ctx); if (ret) return ret; } @@ -5934,7 +6024,7 @@ next_key: * lock the same leaf with btrfs_log_prealloc_extents() below. */ btrfs_release_path(path); - ret = btrfs_log_prealloc_extents(trans, inode, dst_path); + ret = btrfs_log_prealloc_extents(trans, inode, dst_path, ctx); } return ret; @@ -6110,7 +6200,6 @@ static int log_delayed_deletions_full(struct btrfs_trans_handle *trans, static int batch_delete_dir_index_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, - struct btrfs_log_ctx *ctx, const struct list_head *delayed_del_list, const struct btrfs_delayed_item *first, const struct btrfs_delayed_item **last_ret) @@ -6171,7 +6260,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans, if (ret < 0) { return ret; } else if (ret == 0) { - ret = batch_delete_dir_index_items(trans, inode, path, ctx, + ret = batch_delete_dir_index_items(trans, inode, path, delayed_del_list, curr, &last); if (ret) @@ -6247,7 +6336,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { const bool orig_log_new_dentries = ctx->log_new_dentries; - struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_item *item; int ret = 0; @@ -6273,7 +6361,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, if (key.type == BTRFS_ROOT_ITEM_KEY) continue; - di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root); + di_inode = btrfs_iget_logging(key.objectid, inode->root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); break; @@ -6526,7 +6614,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_release_path(path); btrfs_release_path(dst_path); - ret = btrfs_log_all_xattrs(trans, inode, path, dst_path); + ret = btrfs_log_all_xattrs(trans, inode, path, dst_path, ctx); if (ret) goto out_unlock; xattrs_logged = true; @@ -6553,7 +6641,7 @@ log_extents: * BTRFS_INODE_COPY_EVERYTHING set. */ if (!xattrs_logged && inode->logged_trans < trans->transid) { - ret = btrfs_log_all_xattrs(trans, inode, path, dst_path); + ret = btrfs_log_all_xattrs(trans, inode, path, dst_path, ctx); if (ret) goto out_unlock; btrfs_release_path(path); @@ -6657,7 +6745,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -6722,8 +6809,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, cur_offset = item_size; } - dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid, - root); + dir_inode = btrfs_iget_logging(inode_key.objectid, root); /* * If the parent inode was deleted, return an error to * fallback to a transaction commit. This is to prevent @@ -6785,7 +6871,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); while (true) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; int slot; struct btrfs_key search_key; @@ -6800,7 +6885,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, search_key.objectid = found_key.offset; search_key.type = BTRFS_INODE_ITEM_KEY; search_key.offset = 0; - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -6989,6 +7074,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, } /* + * If we're logging an inode from a subvolume created in the current + * transaction we must force a commit since the root is not persisted. + */ + if (btrfs_root_generation(&root->root_item) == trans->transid) { + ret = BTRFS_LOG_FORCE_COMMIT; + goto end_no_trans; + } + + /* * Skip already logged inodes or inodes corresponding to tmpfiles * (since logging them is pointless, a link count of 0 means they * will never be accessible). @@ -7369,6 +7463,24 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, } /* + * Call this when creating a subvolume in a directory. + * Because we don't commit a transaction when creating a subvolume, we can't + * allow the directory pointing to the subvolume to be logged with an entry that + * points to an unpersisted root if we are still in the transaction used to + * create the subvolume, so make any attempt to log the directory to result in a + * full log sync. + * Also we don't need to worry with renames, since btrfs_rename() marks the log + * for full commit when renaming a subvolume. + */ +void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, + struct btrfs_inode *dir) +{ + mutex_lock(&dir->log_mutex); + dir->last_unlink_trans = trans->transid; + mutex_unlock(&dir->log_mutex); +} + +/* * Update the log after adding a new name for an inode. * * @trans: Transaction handle. @@ -7500,8 +7612,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, goto out; } - btrfs_init_log_ctx(&ctx, &inode->vfs_inode); + btrfs_init_log_ctx(&ctx, inode); ctx.logging_new_name = true; + btrfs_init_log_ctx_scratch_eb(&ctx); /* * We don't care about the return value. If we fail to log the new name * then we know the next attempt to sync the log will fallback to a full @@ -7510,6 +7623,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * inconsistent state after a rename operation. */ btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx); + free_extent_buffer(ctx.scratch_eb); ASSERT(list_empty(&ctx.conflict_inodes)); out: /* |