summaryrefslogtreecommitdiff
path: root/fs/btrfs/backref.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/backref.c')
-rw-r--r--fs/btrfs/backref.c138
1 files changed, 103 insertions, 35 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index dce3a16996b9..18374a6d05bd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -138,6 +138,7 @@ struct share_check {
u64 root_objectid;
u64 inum;
int share_count;
+ bool have_delayed_delete_refs;
};
static inline int extent_is_shared(struct share_check *sc)
@@ -288,8 +289,10 @@ static void prelim_release(struct preftree *preftree)
struct prelim_ref *ref, *next_ref;
rbtree_postorder_for_each_entry_safe(ref, next_ref,
- &preftree->root.rb_root, rbnode)
+ &preftree->root.rb_root, rbnode) {
+ free_inode_elem_list(ref->inode_list);
free_pref(ref);
+ }
preftree->root = RB_ROOT_CACHED;
preftree->count = 0;
@@ -647,6 +650,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
return (struct extent_inode_elem *)(uintptr_t)node->aux;
}
+static void free_leaf_list(struct ulist *ulist)
+{
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(ulist, &uiter)))
+ free_inode_elem_list(unode_aux_to_inode_list(node));
+
+ ulist_free(ulist);
+}
+
/*
* We maintain three separate rbtrees: one for direct refs, one for
* indirect refs which have a key, and one for indirect refs which do not
@@ -761,7 +776,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
cond_resched();
}
out:
- ulist_free(parents);
+ /*
+ * We may have inode lists attached to refs in the parents ulist, so we
+ * must free them before freeing the ulist and its refs.
+ */
+ free_leaf_list(parents);
return ret;
}
@@ -820,16 +839,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct preftrees *preftrees, struct share_check *sc)
{
struct btrfs_delayed_ref_node *node;
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct btrfs_key key;
- struct btrfs_key tmp_op_key;
struct rb_node *n;
int count;
int ret = 0;
- if (extent_op && extent_op->update_key)
- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
-
spin_lock(&head->lock);
for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
@@ -855,10 +869,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
case BTRFS_TREE_BLOCK_REF_KEY: {
/* NORMAL INDIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
+ struct btrfs_key *key_ptr = NULL;
+
+ if (head->extent_op && head->extent_op->update_key) {
+ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
+ key_ptr = &key;
+ }
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_indirect_ref(fs_info, preftrees, ref->root,
- &tmp_op_key, ref->level + 1,
+ key_ptr, ref->level + 1,
node->bytenr, count, sc,
GFP_ATOMIC);
break;
@@ -884,13 +904,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
key.offset = ref->offset;
/*
- * Found a inum that doesn't match our known inum, we
- * know it's shared.
+ * If we have a share check context and a reference for
+ * another inode, we can't exit immediately. This is
+ * because even if this is a BTRFS_ADD_DELAYED_REF
+ * reference we may find next a BTRFS_DROP_DELAYED_REF
+ * which cancels out this ADD reference.
+ *
+ * If this is a DROP reference and there was no previous
+ * ADD reference, then we need to signal that when we
+ * process references from the extent tree (through
+ * add_inline_refs() and add_keyed_refs()), we should
+ * not exit early if we find a reference for another
+ * inode, because one of the delayed DROP references
+ * may cancel that reference in the extent tree.
*/
- if (sc && sc->inum && ref->objectid != sc->inum) {
- ret = BACKREF_FOUND_SHARED;
- goto out;
- }
+ if (sc && count < 0)
+ sc->have_delayed_delete_refs = true;
ret = add_indirect_ref(fs_info, preftrees, ref->root,
&key, 0, node->bytenr, count, sc,
@@ -920,7 +949,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
if (!ret)
ret = extent_is_shared(sc);
-out:
+
spin_unlock(&head->lock);
return ret;
}
@@ -1023,7 +1052,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1033,6 +1063,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
ret = add_indirect_ref(fs_info, preftrees, root,
&key, 0, bytenr, count,
sc, GFP_NOFS);
+
break;
}
default:
@@ -1122,7 +1153,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1354,6 +1386,12 @@ again:
if (ret < 0)
goto out;
ref->inode_list = eie;
+ /*
+ * We transferred the list ownership to the ref,
+ * so set to NULL to avoid a double free in case
+ * an error happens after this.
+ */
+ eie = NULL;
}
ret = ulist_add_merge_ptr(refs, ref->parent,
ref->inode_list,
@@ -1379,6 +1417,14 @@ again:
eie->next = ref->inode_list;
}
eie = NULL;
+ /*
+ * We have transferred the inode list ownership from
+ * this ref to the ref we added to the 'refs' ulist.
+ * So set this ref's inode list to NULL to avoid
+ * use-after-free when our caller uses it or double
+ * frees in case an error happens before we return.
+ */
+ ref->inode_list = NULL;
}
cond_resched();
}
@@ -1395,24 +1441,6 @@ out:
return ret;
}
-static void free_leaf_list(struct ulist *blocks)
-{
- struct ulist_node *node = NULL;
- struct extent_inode_elem *eie;
- struct ulist_iterator uiter;
-
- ULIST_ITER_INIT(&uiter);
- while ((node = ulist_next(blocks, &uiter))) {
- if (!node->aux)
- continue;
- eie = unode_aux_to_inode_list(node);
- free_inode_elem_list(eie);
- node->aux = 0;
- }
-
- ulist_free(blocks);
-}
-
/*
* Finds all leafs with a reference to the specified combination of bytenr and
* offset. key_list_head will point to a list of corresponding keys (caller must
@@ -1522,6 +1550,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache
{
struct btrfs_backref_shared_cache_entry *entry;
+ if (!cache->use_cache)
+ return false;
+
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return false;
@@ -1557,6 +1588,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache
return false;
*is_shared = entry->is_shared;
+ /*
+ * If the node at this level is shared, than all nodes below are also
+ * shared. Currently some of the nodes below may be marked as not shared
+ * because we have just switched from one leaf to another, and switched
+ * also other nodes above the leaf and below the current level, so mark
+ * them as shared.
+ */
+ if (*is_shared) {
+ for (int i = 0; i < level; i++) {
+ cache->entries[i].is_shared = true;
+ cache->entries[i].gen = entry->gen;
+ }
+ }
return true;
}
@@ -1573,6 +1617,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
struct btrfs_backref_shared_cache_entry *entry;
u64 gen;
+ if (!cache->use_cache)
+ return;
+
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return;
@@ -1648,6 +1695,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
.root_objectid = root->root_key.objectid,
.inum = inum,
.share_count = 0,
+ .have_delayed_delete_refs = false,
};
int level;
@@ -1669,6 +1717,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
/* -1 means we are in the bytenr of the data extent. */
level = -1;
ULIST_ITER_INIT(&uiter);
+ cache->use_cache = true;
while (1) {
bool is_shared;
bool cached;
@@ -1698,6 +1747,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
extent_gen > btrfs_root_last_snapshot(&root->root_item))
break;
+ /*
+ * If our data extent was not directly shared (without multiple
+ * reference items), than it might have a single reference item
+ * with a count > 1 for the same offset, which means there are 2
+ * (or more) file extent items that point to the data extent -
+ * this happens when a file extent item needs to be split and
+ * then one item gets moved to another leaf due to a b+tree leaf
+ * split when inserting some item. In this case the file extent
+ * items may be located in different leaves and therefore some
+ * of the leaves may be referenced through shared subtrees while
+ * others are not. Since our extent buffer cache only works for
+ * a single path (by far the most common case and simpler to
+ * deal with), we can not use it if we have multiple leaves
+ * (which implies multiple paths).
+ */
+ if (level == -1 && tmp->nnodes > 1)
+ cache->use_cache = false;
+
if (level >= 0)
store_backref_shared_cache(cache, root, bytenr,
level, false);
@@ -1713,6 +1780,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
break;
}
shared.share_count = 0;
+ shared.have_delayed_delete_refs = false;
cond_resched();
}