summaryrefslogtreecommitdiff
path: root/fs/bcachefs/snapshot.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/snapshot.c')
-rw-r--r--fs/bcachefs/snapshot.c1137
1 files changed, 636 insertions, 501 deletions
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 544322d5c251..00d62d1190ef 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bbpos.h"
#include "bkey_buf.h"
+#include "btree_cache.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "buckets.h"
+#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@@ -31,15 +34,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
le32_to_cpu(t.v->root_snapshot));
}
-int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k,
- enum bkey_invalid_flags flags,
- struct printbuf *err)
+int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k,
+ struct bkey_validate_context from)
{
int ret = 0;
bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
- bkey_lt(k.k->p, POS(0, 1)), c, err,
- snapshot_tree_pos_bad,
+ bkey_lt(k.k->p, POS(0, 1)),
+ c, snapshot_tree_pos_bad,
"bad pos");
fsck_err:
return ret;
@@ -49,7 +51,7 @@ int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot_tree *s)
{
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
- BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
+ BTREE_ITER_with_updates, snapshot_tree, s);
if (bch2_err_matches(ret, ENOENT))
ret = -BCH_ERR_ENOENT_snapshot_tree;
@@ -141,13 +143,14 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
- while (id && id < ancestor - IS_ANCESTOR_BITMAP)
- id = get_ancestor_below(t, id, ancestor);
+ if (likely(ancestor >= IS_ANCESTOR_BITMAP))
+ while (id && id < ancestor - IS_ANCESTOR_BITMAP)
+ id = get_ancestor_below(t, id, ancestor);
ret = id && id < ancestor
? test_ancestor_bitmap(t, id, ancestor)
@@ -168,6 +171,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
+ if (unlikely(new_bytes > INT_MAX))
+ return NULL;
+
new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
@@ -205,9 +211,14 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
{
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
- prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
- BCH_SNAPSHOT_SUBVOL(s.v),
- BCH_SNAPSHOT_DELETED(s.v),
+ if (BCH_SNAPSHOT_SUBVOL(s.v))
+ prt_str(out, "subvol ");
+ if (BCH_SNAPSHOT_WILL_DELETE(s.v))
+ prt_str(out, "will_delete ");
+ if (BCH_SNAPSHOT_DELETED(s.v))
+ prt_str(out, "deleted ");
+
+ prt_printf(out, "parent %10u children %10u %10u subvol %u tree %u",
le32_to_cpu(s.v->parent),
le32_to_cpu(s.v->children[0]),
le32_to_cpu(s.v->children[1]),
@@ -222,55 +233,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
le32_to_cpu(s.v->skip[2]));
}
-int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k,
- enum bkey_invalid_flags flags,
- struct printbuf *err)
+int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k,
+ struct bkey_validate_context from)
{
struct bkey_s_c_snapshot s;
u32 i, id;
int ret = 0;
bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
- bkey_lt(k.k->p, POS(0, 1)), c, err,
- snapshot_pos_bad,
+ bkey_lt(k.k->p, POS(0, 1)),
+ c, snapshot_pos_bad,
"bad pos");
s = bkey_s_c_to_snapshot(k);
id = le32_to_cpu(s.v->parent);
- bkey_fsck_err_on(id && id <= k.k->p.offset, c, err,
- snapshot_parent_bad,
+ bkey_fsck_err_on(id && id <= k.k->p.offset,
+ c, snapshot_parent_bad,
"bad parent node (%u <= %llu)",
id, k.k->p.offset);
- bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err,
- snapshot_children_not_normalized,
+ bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]),
+ c, snapshot_children_not_normalized,
"children not normalized");
- bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err,
- snapshot_child_duplicate,
+ bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1],
+ c, snapshot_child_duplicate,
"duplicate child nodes");
for (i = 0; i < 2; i++) {
id = le32_to_cpu(s.v->children[i]);
- bkey_fsck_err_on(id >= k.k->p.offset, c, err,
- snapshot_child_bad,
+ bkey_fsck_err_on(id >= k.k->p.offset,
+ c, snapshot_child_bad,
"bad child node (%u >= %llu)",
id, k.k->p.offset);
}
if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
- le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err,
- snapshot_skiplist_not_normalized,
+ le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]),
+ c, snapshot_skiplist_not_normalized,
"skiplist not normalized");
for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
id = le32_to_cpu(s.v->skip[i]);
- bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err,
- snapshot_skiplist_bad,
+ bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent),
+ c, snapshot_skiplist_bad,
"bad skiplist node %u", id);
}
}
@@ -278,27 +288,20 @@ fsck_err:
return ret;
}
-static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
-{
- struct snapshot_t *t = snapshot_t_mut(c, id);
- u32 parent = id;
-
- while ((parent = bch2_snapshot_parent_early(c, parent)) &&
- parent - id - 1 < IS_ANCESTOR_BITMAP)
- __set_bit(parent - id - 1, t->is_ancestor);
-}
-
-static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
+static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
{
mutex_lock(&c->snapshot_table_lock);
- __set_is_ancestor_bitmap(c, id);
+ int ret = snapshot_t_mut(c, id)
+ ? 0
+ : -BCH_ERR_ENOMEM_mark_snapshot;
mutex_unlock(&c->snapshot_table_lock);
+ return ret;
}
static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+ enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
struct snapshot_t *t;
@@ -316,6 +319,9 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
+ t->state = !BCH_SNAPSHOT_DELETED(s.v)
+ ? SNAPSHOT_ID_live
+ : SNAPSHOT_ID_deleted;
t->parent = le32_to_cpu(s.v->parent);
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
@@ -334,11 +340,15 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
t->skip[2] = 0;
}
- __set_is_ancestor_bitmap(c, id);
+ u32 parent = id;
+
+ while ((parent = bch2_snapshot_parent_early(c, parent)) &&
+ parent - id - 1 < IS_ANCESTOR_BITMAP)
+ __set_bit(parent - id - 1, t->is_ancestor);
- if (BCH_SNAPSHOT_DELETED(s.v)) {
+ if (BCH_SNAPSHOT_WILL_DELETE(s.v)) {
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
+ if (c->recovery.pass_done > BCH_RECOVERY_PASS_delete_dead_snapshots)
bch2_delete_dead_snapshots_async(c);
}
} else {
@@ -352,7 +362,7 @@ err:
int bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s new,
- unsigned flags)
+ enum btree_iter_update_trigger_flags flags)
{
return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
}
@@ -361,71 +371,7 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s)
{
return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
- BTREE_ITER_WITH_UPDATES, snapshot, s);
-}
-
-static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
-{
- struct bch_snapshot v;
- int ret;
-
- if (!id)
- return 0;
-
- ret = bch2_snapshot_lookup(trans, id, &v);
- if (bch2_err_matches(ret, ENOENT))
- bch_err(trans->c, "snapshot node %u not found", id);
- if (ret)
- return ret;
-
- return !BCH_SNAPSHOT_DELETED(&v);
-}
-
-/*
- * If @k is a snapshot with just one live child, it's part of a linear chain,
- * which we consider to be an equivalence class: and then after snapshot
- * deletion cleanup, there should only be a single key at a given position in
- * this equivalence class.
- *
- * This sets the equivalence class of @k to be the child's equivalence class, if
- * it's part of such a linear chain: this correctly sets equivalence classes on
- * startup if we run leaf to root (i.e. in natural key order).
- */
-static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
-{
- struct bch_fs *c = trans->c;
- unsigned i, nr_live = 0, live_idx = 0;
- struct bkey_s_c_snapshot snap;
- u32 id = k.k->p.offset, child[2];
-
- if (k.k->type != KEY_TYPE_snapshot)
- return 0;
-
- snap = bkey_s_c_to_snapshot(k);
-
- child[0] = le32_to_cpu(snap.v->children[0]);
- child[1] = le32_to_cpu(snap.v->children[1]);
-
- for (i = 0; i < 2; i++) {
- int ret = bch2_snapshot_live(trans, child[i]);
-
- if (ret < 0)
- return ret;
-
- if (ret)
- live_idx = i;
- nr_live += ret;
- }
-
- mutex_lock(&c->snapshot_table_lock);
-
- snapshot_t_mut(c, id)->equiv = nr_live == 1
- ? snapshot_t_mut(c, child[live_idx])->equiv
- : id;
-
- mutex_unlock(&c->snapshot_table_lock);
-
- return 0;
+ BTREE_ITER_with_updates, snapshot, s);
}
/* fsck: */
@@ -463,18 +409,29 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
return 0;
}
-static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
+u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root,
+ snapshot_id_list *skip)
{
- u32 id = snapshot_root;
- u32 subvol = 0, s;
-
- while (id) {
- s = snapshot_t(c, id)->subvol;
-
- if (s && (!subvol || s < subvol))
- subvol = s;
+ u32 id, subvol = 0, s;
+retry:
+ id = snapshot_root;
+ rcu_read_lock();
+ while (id && bch2_snapshot_exists(c, id)) {
+ if (!(skip && snapshot_list_has_id(skip, id))) {
+ s = snapshot_t(c, id)->subvol;
+ if (s && (!subvol || s < subvol))
+ subvol = s;
+ }
id = bch2_snapshot_tree_next(c, id);
+ if (id == snapshot_root)
+ break;
+ }
+ rcu_read_unlock();
+
+ if (!subvol && skip) {
+ skip = NULL;
+ goto retry;
}
return subvol;
@@ -503,13 +460,12 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
break;
}
}
-
bch2_trans_iter_exit(trans, &iter);
if (!ret && !found) {
struct bkey_i_subvolume *u;
- *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
+ *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL);
u = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, *subvol_id),
@@ -533,6 +489,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
struct bch_snapshot s;
struct bch_subvolume subvol;
struct printbuf buf = PRINTBUF;
+ struct btree_iter snapshot_iter = {};
u32 root_id;
int ret;
@@ -542,40 +499,53 @@ static int check_snapshot_tree(struct btree_trans *trans,
st = bkey_s_c_to_snapshot_tree(k);
root_id = le32_to_cpu(st.v->root_snapshot);
- ret = bch2_snapshot_lookup(trans, root_id, &s);
+ struct bkey_s_c_snapshot snapshot_k =
+ bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots,
+ POS(0, root_id), 0, snapshot);
+ ret = bkey_err(snapshot_k);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
+ if (!ret)
+ bkey_val_copy(&s, snapshot_k);
+
if (fsck_err_on(ret ||
root_id != bch2_snapshot_root(c, root_id) ||
st.k->p.offset != le32_to_cpu(s.tree),
- c, snapshot_tree_to_missing_snapshot,
- "snapshot tree points to missing/incorrect snapshot:\n %s",
- (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+ trans, snapshot_tree_to_missing_snapshot,
+ "snapshot tree points to missing/incorrect snapshot:\n%s",
+ (bch2_bkey_val_to_text(&buf, c, st.s_c),
+ prt_newline(&buf),
+ ret
+ ? prt_printf(&buf, "(%s)", bch2_err_str(ret))
+ : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c),
+ buf.buf))) {
ret = bch2_btree_delete_at(trans, iter, 0);
goto err;
}
- ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
- false, 0, &subvol);
+ if (!st.v->master_subvol)
+ goto out;
+
+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
if (fsck_err_on(ret,
- c, snapshot_tree_to_missing_subvol,
- "snapshot tree points to missing subvolume:\n %s",
+ trans, snapshot_tree_to_missing_subvol,
+ "snapshot tree points to missing subvolume:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(!bch2_snapshot_is_ancestor(c,
le32_to_cpu(subvol.snapshot),
root_id),
- c, snapshot_tree_to_wrong_subvol,
- "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
+ trans, snapshot_tree_to_wrong_subvol,
+ "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
- c, snapshot_tree_to_snapshot_subvol,
- "snapshot tree points to snapshot subvolume:\n %s",
+ trans, snapshot_tree_to_snapshot_subvol,
+ "snapshot tree points to snapshot subvolume:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
struct bkey_i_snapshot_tree *u;
@@ -600,8 +570,10 @@ static int check_snapshot_tree(struct btree_trans *trans,
u->v.master_subvol = cpu_to_le32(subvol_id);
st = snapshot_tree_i_to_s_c(u);
}
+out:
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &snapshot_iter);
printbuf_exit(&buf);
return ret;
}
@@ -618,7 +590,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c)
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter,
BTREE_ID_snapshot_trees, POS_MIN,
- BTREE_ITER_PREFETCH, k,
+ BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot_tree(trans, &iter, k)));
bch_err_fn(c, ret);
@@ -695,7 +667,7 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
root = bch2_bkey_get_iter_typed(trans, &root_iter,
BTREE_ID_snapshots, POS(0, root_id),
- BTREE_ITER_WITH_UPDATES, snapshot);
+ BTREE_ITER_with_updates, snapshot);
ret = bkey_err(root);
if (ret)
goto err;
@@ -710,7 +682,7 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u) ?:
bch2_snapshot_tree_create(trans, root_id,
- bch2_snapshot_tree_oldest_subvol(c, root_id),
+ bch2_snapshot_oldest_subvol(c, root_id, NULL),
&tree_id);
if (ret)
goto err;
@@ -755,6 +727,9 @@ static int check_snapshot(struct btree_trans *trans,
memset(&s, 0, sizeof(s));
memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
+ if (BCH_SNAPSHOT_DELETED(&s))
+ return 0;
+
id = le32_to_cpu(s.parent);
if (id) {
ret = bch2_snapshot_lookup(trans, id, &v);
@@ -792,11 +767,11 @@ static int check_snapshot(struct btree_trans *trans,
}
bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
- !BCH_SNAPSHOT_DELETED(&s);
+ !BCH_SNAPSHOT_WILL_DELETE(&s);
if (should_have_subvol) {
id = le32_to_cpu(s.subvol);
- ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
+ ret = bch2_subvolume_get(trans, id, false, &subvol);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
@@ -811,8 +786,8 @@ static int check_snapshot(struct btree_trans *trans,
}
} else {
if (fsck_err_on(s.subvol,
- c, snapshot_should_not_have_subvol,
- "snapshot should not point to subvol:\n %s",
+ trans, snapshot_should_not_have_subvol,
+ "snapshot should not point to subvol:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
@@ -828,8 +803,9 @@ static int check_snapshot(struct btree_trans *trans,
if (ret < 0)
goto err;
- if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree,
- "snapshot points to missing/incorrect tree:\n %s",
+ if (fsck_err_on(!ret,
+ trans, snapshot_to_bad_snapshot_tree,
+ "snapshot points to missing/incorrect tree:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
if (ret)
@@ -840,8 +816,8 @@ static int check_snapshot(struct btree_trans *trans,
real_depth = bch2_snapshot_depth(c, parent_id);
if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
- c, snapshot_bad_depth,
- "snapshot with incorrect depth field, should be %u:\n %s",
+ trans, snapshot_bad_depth,
+ "snapshot with incorrect depth field, should be %u:\n%s",
real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
@@ -856,8 +832,9 @@ static int check_snapshot(struct btree_trans *trans,
if (ret < 0)
goto err;
- if (fsck_err_on(!ret, c, snapshot_bad_skiplist,
- "snapshot with bad skiplist field:\n %s",
+ if (fsck_err_on(!ret,
+ trans, snapshot_bad_skiplist,
+ "snapshot with bad skiplist field:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
@@ -886,7 +863,7 @@ int bch2_check_snapshots(struct bch_fs *c)
int ret = bch2_trans_run(c,
for_each_btree_key_reverse_commit(trans, iter,
BTREE_ID_snapshots, POS_MAX,
- BTREE_ITER_PREFETCH, k,
+ BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot(trans, &iter, k)));
bch_err_fn(c, ret);
@@ -897,14 +874,30 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
- if (bch2_snapshot_equiv(c, id))
- return 0;
+ /* Do we need to reconstruct the snapshot_tree entry as well? */
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+ u32 tree_id = 0;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN,
+ 0, k, ret) {
+ if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) {
+ tree_id = k.k->p.offset;
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
- u32 tree_id;
- int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
if (ret)
return ret;
+ if (!tree_id) {
+ ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+ if (ret)
+ return ret;
+ }
+
struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
ret = PTR_ERR_OR_ZERO(snapshot);
if (ret)
@@ -915,10 +908,18 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
snapshot->v.tree = cpu_to_le32(tree_id);
snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
- return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
- bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
- bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
- bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
+ 0, k, ret) {
+ if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) {
+ snapshot->v.subvol = cpu_to_le32(k.k->p.offset);
+ SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true);
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return bch2_snapshot_table_make_room(c, id) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0);
}
/* Figure out which snapshot nodes belong in the same tree: */
@@ -1001,7 +1002,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
r.btree = btree;
ret = for_each_btree_key(trans, iter, btree, POS_MIN,
- BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
+ BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({
get_snapshot_trees(c, &r, k.k->p);
}));
if (ret)
@@ -1016,9 +1017,9 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) {
- if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
- c, snapshot_node_missing,
- "snapshot node %u from tree %s missing", *id, buf.buf)) {
+ if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty,
+ trans, snapshot_node_missing,
+ "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
if (t->nr > 1) {
bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
ret = -BCH_ERR_fsck_repair_unimplemented;
@@ -1041,19 +1042,54 @@ err:
return ret;
}
+int __bch2_check_key_has_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+ enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot);
+
+ /* Snapshot was definitively deleted, this error is marked autofix */
+ if (fsck_err_on(state == SNAPSHOT_ID_deleted,
+ trans, bkey_in_deleted_snapshot,
+ "key in deleted snapshot %s, delete?",
+ (bch2_btree_id_to_text(&buf, iter->btree_id),
+ prt_char(&buf, ' '),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node) ?: 1;
+
+ /*
+ * Snapshot missing: we should have caught this with btree_lost_data and
+ * kicked off reconstruct_snapshots, so if we end up here we have no
+ * idea what happened:
+ */
+ if (fsck_err_on(state == SNAPSHOT_ID_empty,
+ trans, bkey_in_missing_snapshot,
+ "key in missing snapshot %s, delete?",
+ (bch2_btree_id_to_text(&buf, iter->btree_id),
+ prt_char(&buf, ' '),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node) ?: 1;
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
{
struct btree_iter iter;
- struct bkey_i_snapshot *s;
- int ret = 0;
-
- s = bch2_bkey_get_mut_typed(trans, &iter,
+ struct bkey_i_snapshot *s =
+ bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshots, POS(0, id),
0, snapshot);
- ret = PTR_ERR_OR_ZERO(s);
+ int ret = PTR_ERR_OR_ZERO(s);
if (unlikely(ret)) {
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
trans->c, "missing snapshot %u", id);
@@ -1061,10 +1097,10 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
}
/* already deleted? */
- if (BCH_SNAPSHOT_DELETED(&s->v))
+ if (BCH_SNAPSHOT_WILL_DELETE(&s->v))
goto err;
- SET_BCH_SNAPSHOT_DELETED(&s->v, true);
+ SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true);
SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
s->v.subvol = 0;
err:
@@ -1081,27 +1117,28 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
- struct btree_iter c_iter = (struct btree_iter) { NULL };
- struct btree_iter tree_iter = (struct btree_iter) { NULL };
- struct bkey_s_c_snapshot s;
+ struct btree_iter iter, p_iter = {};
+ struct btree_iter c_iter = {};
+ struct btree_iter tree_iter = {};
u32 parent_id, child_id;
unsigned i;
int ret = 0;
- s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
- BTREE_ITER_INTENT, snapshot);
- ret = bkey_err(s);
+ struct bkey_i_snapshot *s =
+ bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
+ BTREE_ITER_intent, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
"missing snapshot %u", id);
if (ret)
goto err;
- BUG_ON(s.v->children[1]);
+ BUG_ON(BCH_SNAPSHOT_DELETED(&s->v));
+ BUG_ON(s->v.children[1]);
- parent_id = le32_to_cpu(s.v->parent);
- child_id = le32_to_cpu(s.v->children[0]);
+ parent_id = le32_to_cpu(s->v.parent);
+ child_id = le32_to_cpu(s->v.children[0]);
if (parent_id) {
struct bkey_i_snapshot *parent;
@@ -1159,24 +1196,38 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
*/
struct bkey_i_snapshot_tree *s_t;
- BUG_ON(s.v->children[1]);
+ BUG_ON(s->v.children[1]);
s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
- BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
+ BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)),
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(s_t);
if (ret)
goto err;
- if (s.v->children[0]) {
- s_t->v.root_snapshot = s.v->children[0];
+ if (s->v.children[0]) {
+ s_t->v.root_snapshot = s->v.children[0];
} else {
s_t->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&s_t->k, 0);
}
}
- ret = bch2_btree_delete_at(trans, &iter, 0);
+ if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) {
+ SET_BCH_SNAPSHOT_DELETED(&s->v, true);
+ s->v.parent = 0;
+ s->v.children[0] = 0;
+ s->v.children[1] = 0;
+ s->v.subvol = 0;
+ s->v.tree = 0;
+ s->v.depth = 0;
+ s->v.skip[0] = 0;
+ s->v.skip[1] = 0;
+ s->v.skip[2] = 0;
+ } else {
+ s->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&s->k, 0);
+ }
err:
bch2_trans_iter_exit(trans, &tree_iter);
bch2_trans_iter_exit(trans, &p_iter);
@@ -1199,14 +1250,14 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
- POS_MIN, BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek(&iter);
+ POS_MIN, BTREE_ITER_intent);
+ k = bch2_btree_iter_peek(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
for (i = 0; i < nr_snapids; i++) {
- k = bch2_btree_iter_prev_slot(&iter);
+ k = bch2_btree_iter_prev_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@@ -1241,10 +1292,6 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
goto err;
new_snapids[i] = iter.pos.offset;
-
- mutex_lock(&c->snapshot_table_lock);
- snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
- mutex_unlock(&c->snapshot_table_lock);
}
err:
bch2_trans_iter_exit(trans, &iter);
@@ -1350,70 +1397,152 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it
*/
-static int snapshot_delete_key(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k,
- snapshot_id_list *deleted,
- snapshot_id_list *equiv_seen,
- struct bpos *last_pos)
+static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
{
- struct bch_fs *c = trans->c;
- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+ darray_for_each(*l, i)
+ if (i->id == id)
+ return i->live_child;
+ return 0;
+}
+
+static unsigned __live_child(struct snapshot_table *t, u32 id,
+ snapshot_id_list *delete_leaves,
+ interior_delete_list *delete_interior)
+{
+ struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return 0;
- if (!bkey_eq(k.k->p, *last_pos))
- equiv_seen->nr = 0;
- *last_pos = k.k->p;
+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++)
+ if (s->children[i] &&
+ !snapshot_list_has_id(delete_leaves, s->children[i]) &&
+ !interior_delete_has_id(delete_interior, s->children[i]))
+ return s->children[i];
- if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
- snapshot_list_has_id(equiv_seen, equiv)) {
- return bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
- } else {
- return snapshot_list_add(c, equiv_seen, equiv);
+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) {
+ u32 live_child = s->children[i]
+ ? __live_child(t, s->children[i], delete_leaves, delete_interior)
+ : 0;
+ if (live_child)
+ return live_child;
}
+
+ return 0;
}
-static int move_key_to_correct_snapshot(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k)
+static unsigned live_child(struct bch_fs *c, u32 id)
{
- struct bch_fs *c = trans->c;
- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+ struct snapshot_delete *d = &c->snapshot_delete;
- /*
- * When we have a linear chain of snapshot nodes, we consider
- * those to form an equivalence class: we're going to collapse
- * them all down to a single node, and keep the leaf-most node -
- * which has the same id as the equivalence class id.
- *
- * If there are multiple keys in different snapshots at the same
- * position, we're only going to keep the one in the newest
- * snapshot - the rest have been overwritten and are redundant,
- * and for the key we're going to keep we need to move it to the
- * equivalance class ID if it's not there already.
- */
- if (equiv != k.k->p.snapshot) {
- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
- struct btree_iter new_iter;
- int ret;
+ rcu_read_lock();
+ u32 ret = __live_child(rcu_dereference(c->snapshots), id,
+ &d->delete_leaves, &d->delete_interior);
+ rcu_read_unlock();
+ return ret;
+}
- ret = PTR_ERR_OR_ZERO(new);
+static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id)
+{
+ return snapshot_list_has_id(&d->delete_leaves, id) ||
+ interior_delete_has_id(&d->delete_interior, id) != 0;
+}
+
+static int delete_dead_snapshots_process_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct snapshot_delete *d = &trans->c->snapshot_delete;
+
+ if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot))
+ return bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node);
+
+ u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot);
+ if (live_child) {
+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
+ int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
- new->k.p.snapshot = equiv;
+ new->k.p.snapshot = live_child;
- bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
- BTREE_ITER_ALL_SNAPSHOTS|
- BTREE_ITER_CACHED|
- BTREE_ITER_INTENT);
+ struct btree_iter dst_iter;
+ struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter,
+ iter->btree_id, new->k.p,
+ BTREE_ITER_all_snapshots|
+ BTREE_ITER_intent);
+ ret = bkey_err(dst_k);
+ if (ret)
+ return ret;
- ret = bch2_btree_iter_traverse(&new_iter) ?:
- bch2_trans_update(trans, &new_iter, new,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ ret = (bkey_deleted(dst_k.k)
+ ? bch2_trans_update(trans, &dst_iter, new,
+ BTREE_UPDATE_internal_snapshot_node)
+ : 0) ?:
bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
- bch2_trans_iter_exit(trans, &new_iter);
+ BTREE_UPDATE_internal_snapshot_node);
+ bch2_trans_iter_exit(trans, &dst_iter);
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ u64 inum = iter->btree_id != BTREE_ID_inodes
+ ? iter->pos.inode
+ : iter->pos.offset;
+
+ if (*prev_inum == inum)
+ return false;
+
+ *prev_inum = inum;
+
+ bool ret = !snapshot_list_has_id(&d->deleting_from_trees,
+ bch2_snapshot_tree(c, iter->pos.snapshot));
+ if (unlikely(ret)) {
+ struct bpos pos = iter->pos;
+ pos.snapshot = 0;
+ if (iter->btree_id != BTREE_ID_inodes)
+ pos.offset = U64_MAX;
+ bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos));
+ }
+
+ return ret;
+}
+
+static int delete_dead_snapshot_keys_v1(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
+ struct disk_reservation res = { 0 };
+ u64 prev_inum = 0;
+
+ d->pos.pos = POS_MIN;
+
+ if (!btree_type_has_snapshots(d->pos.btree))
+ continue;
+
+ int ret = for_each_btree_key_commit(trans, iter,
+ d->pos.btree, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+
+ bch2_disk_reservation_put(c, &res);
+
if (ret)
return ret;
}
@@ -1421,28 +1550,92 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans,
return 0;
}
-static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
+static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree,
+ struct bpos start, struct bpos end)
{
- struct bkey_s_c_snapshot snap;
- u32 children[2];
- int ret;
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct disk_reservation res = { 0 };
+
+ d->pos.btree = btree;
+ d->pos.pos = POS_MIN;
+
+ int ret = for_each_btree_key_max_commit(trans, iter,
+ btree, start, end,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.pos = iter.pos;
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
- if (k.k->type != KEY_TYPE_snapshot)
- return 0;
+ bch2_disk_reservation_put(c, &res);
+ return ret;
+}
- snap = bkey_s_c_to_snapshot(k);
- if (BCH_SNAPSHOT_DELETED(snap.v) ||
- BCH_SNAPSHOT_SUBVOL(snap.v))
- return 0;
+static int delete_dead_snapshot_keys_v2(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct disk_reservation res = { 0 };
+ u64 prev_inum = 0;
+ int ret = 0;
- children[0] = le32_to_cpu(snap.v->children[0]);
- children[1] = le32_to_cpu(snap.v->children[1]);
+ struct btree_iter iter;
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
- ret = bch2_snapshot_live(trans, children[0]) ?:
- bch2_snapshot_live(trans, children[1]);
- if (ret < 0)
- return ret;
- return !ret;
+ while (1) {
+ struct bkey_s_c k;
+ ret = lockrestart_do(trans,
+ bkey_err(k = bch2_btree_iter_peek(trans, &iter)));
+ if (ret)
+ break;
+
+ if (!k.k)
+ break;
+
+ d->pos.btree = iter.btree_id;
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ if (snapshot_id_dying(d, k.k->p.snapshot)) {
+ struct bpos start = POS(k.k->p.offset, 0);
+ struct bpos end = POS(k.k->p.offset, U64_MAX);
+
+ ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?:
+ delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?:
+ delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end);
+ if (ret)
+ break;
+
+ bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1));
+ } else {
+ bch2_btree_iter_advance(trans, &iter);
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ goto err;
+
+ prev_inum = 0;
+ ret = for_each_btree_key_commit(trans, iter,
+ BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.btree = iter.btree_id;
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+err:
+ bch2_disk_reservation_put(c, &res);
+ return ret;
}
/*
@@ -1450,26 +1643,66 @@ static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
* as deleted.
*/
-static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
+static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k)
{
- int ret = bch2_snapshot_needs_delete(trans, k);
+ if (k.k->type != KEY_TYPE_snapshot)
+ return 0;
+
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
+ unsigned live_children = 0;
+ int ret = 0;
+
+ if (BCH_SNAPSHOT_SUBVOL(s.v))
+ return 0;
+
+ if (BCH_SNAPSHOT_DELETED(s.v))
+ return 0;
+
+ mutex_lock(&d->progress_lock);
+ for (unsigned i = 0; i < 2; i++) {
+ u32 child = le32_to_cpu(s.v->children[i]);
+
+ live_children += child &&
+ !snapshot_list_has_id(&d->delete_leaves, child);
+ }
+
+ u32 tree = bch2_snapshot_tree(c, s.k->p.offset);
+
+ if (live_children == 0) {
+ ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
+ snapshot_list_add(c, &d->delete_leaves, s.k->p.offset);
+ } else if (live_children == 1) {
+ struct snapshot_interior_delete n = {
+ .id = s.k->p.offset,
+ .live_child = live_child(c, s.k->p.offset),
+ };
+
+ if (!n.live_child) {
+ bch_err(c, "error finding live child of snapshot %u", n.id);
+ ret = -EINVAL;
+ } else {
+ ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
+ darray_push(&d->delete_interior, n);
+ }
+ }
+ mutex_unlock(&d->progress_lock);
- return ret <= 0
- ? ret
- : bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
+ return ret;
}
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
- snapshot_id_list *skip)
+ interior_delete_list *skip)
{
rcu_read_lock();
- while (snapshot_list_has_id(skip, id))
+ while (interior_delete_has_id(skip, id))
id = __bch2_snapshot_parent(c, id);
while (n--) {
do {
id = __bch2_snapshot_parent(c, id);
- } while (snapshot_list_has_id(skip, id));
+ } while (interior_delete_has_id(skip, id));
}
rcu_read_unlock();
@@ -1478,17 +1711,20 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
struct btree_iter *iter, struct bkey_s_c k,
- snapshot_id_list *deleted)
+ interior_delete_list *deleted)
{
struct bch_fs *c = trans->c;
u32 nr_deleted_ancestors = 0;
struct bkey_i_snapshot *s;
int ret;
+ if (!bch2_snapshot_exists(c, k.k->p.offset))
+ return 0;
+
if (k.k->type != KEY_TYPE_snapshot)
return 0;
- if (snapshot_list_has_id(deleted, k.k->p.offset))
+ if (interior_delete_has_id(deleted, k.k->p.offset))
return 0;
s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
@@ -1497,7 +1733,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return ret;
darray_for_each(*deleted, i)
- nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id);
if (!nr_deleted_ancestors)
return 0;
@@ -1515,7 +1751,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
u32 id = le32_to_cpu(s->v.skip[j]);
- if (snapshot_list_has_id(deleted, id)) {
+ if (interior_delete_has_id(deleted, id)) {
id = bch2_snapshot_nth_parent_skip(c,
parent,
depth > 1
@@ -1532,110 +1768,79 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return bch2_trans_update(trans, iter, &s->k_i, 0);
}
-int bch2_delete_dead_snapshots(struct bch_fs *c)
+static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d)
+{
+ prt_printf(out, "deleting from trees");
+ darray_for_each(d->deleting_from_trees, i)
+ prt_printf(out, " %u", *i);
+
+ prt_printf(out, "deleting leaves");
+ darray_for_each(d->delete_leaves, i)
+ prt_printf(out, " %u", *i);
+ prt_newline(out);
+
+ prt_printf(out, "interior");
+ darray_for_each(d->delete_interior, i)
+ prt_printf(out, " %u->%u", i->id, i->live_child);
+ prt_newline(out);
+}
+
+int __bch2_delete_dead_snapshots(struct bch_fs *c)
{
- struct btree_trans *trans;
- snapshot_id_list deleted = { 0 };
- snapshot_id_list deleted_interior = { 0 };
- u32 id;
+ struct snapshot_delete *d = &c->snapshot_delete;
int ret = 0;
- if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
+ if (!mutex_trylock(&d->lock))
return 0;
- if (!test_bit(BCH_FS_started, &c->flags)) {
- ret = bch2_fs_read_write_early(c);
- bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
- if (ret)
- return ret;
- }
+ if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
+ goto out_unlock;
- trans = bch2_trans_get(c);
+ struct btree_trans *trans = bch2_trans_get(c);
/*
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k,
- NULL, NULL, 0,
- bch2_delete_redundant_snapshot(trans, k));
- bch_err_msg(c, ret, "deleting redundant snapshots");
- if (ret)
- goto err;
+ d->running = true;
+ d->pos = BBPOS_MIN;
- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k,
- bch2_snapshot_set_equiv(trans, k));
- bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
+ check_should_delete_snapshot(trans, k));
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "walking snapshots");
if (ret)
goto err;
- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k, ({
- if (k.k->type != KEY_TYPE_snapshot)
- continue;
-
- BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
- ? snapshot_list_add(c, &deleted, k.k->p.offset)
- : 0;
- }));
- bch_err_msg(c, ret, "walking snapshots");
- if (ret)
+ if (!d->delete_leaves.nr && !d->delete_interior.nr)
goto err;
- for (id = 0; id < BTREE_ID_NR; id++) {
- struct bpos last_pos = POS_MIN;
- snapshot_id_list equiv_seen = { 0 };
- struct disk_reservation res = { 0 };
-
- if (!btree_type_has_snapshots(id))
- continue;
-
- /*
- * deleted inodes btree is maintained by a trigger on the inodes
- * btree - no work for us to do here, and it's not safe to scan
- * it because we'll see out of date keys due to the btree write
- * buffer:
- */
- if (id == BTREE_ID_deleted_inodes)
- continue;
-
- ret = for_each_btree_key_commit(trans, iter,
- id, POS_MIN,
- BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
- &res, NULL, BCH_TRANS_COMMIT_no_enospc,
- snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
- for_each_btree_key_commit(trans, iter,
- id, POS_MIN,
- BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
- &res, NULL, BCH_TRANS_COMMIT_no_enospc,
- move_key_to_correct_snapshot(trans, &iter, k));
-
- bch2_disk_reservation_put(c, &res);
- darray_exit(&equiv_seen);
+ {
+ struct printbuf buf = PRINTBUF;
+ bch2_snapshot_delete_nodes_to_text(&buf, d);
- bch_err_msg(c, ret, "deleting keys from dying snapshots");
+ ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
+ printbuf_exit(&buf);
if (ret)
goto err;
}
- bch2_trans_unlock(trans);
- down_write(&c->snapshot_create_lock);
-
- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k, ({
- u32 snapshot = k.k->p.offset;
- u32 equiv = bch2_snapshot_equiv(c, snapshot);
-
- equiv != snapshot
- ? snapshot_list_add(c, &deleted_interior, snapshot)
- : 0;
- }));
-
- bch_err_msg(c, ret, "walking snapshots");
+ ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)
+ ? delete_dead_snapshot_keys_v2(trans)
+ : delete_dead_snapshot_keys_v1(trans);
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting keys from dying snapshots");
if (ret)
- goto err_create_lock;
+ goto err;
+
+ darray_for_each(d->delete_leaves, i) {
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, *i));
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting snapshot %u", *i);
+ if (ret)
+ goto err;
+ }
/*
* Fixing children of deleted snapshots can't be done completely
@@ -1643,50 +1848,81 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
* nodes some depth fields will be off:
*/
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
- BTREE_ITER_INTENT, k,
+ BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
if (ret)
- goto err_create_lock;
-
- darray_for_each(deleted, i) {
- ret = commit_do(trans, NULL, NULL, 0,
- bch2_snapshot_node_delete(trans, *i));
- bch_err_msg(c, ret, "deleting snapshot %u", *i);
- if (ret)
- goto err_create_lock;
- }
+ goto err;
- darray_for_each(deleted_interior, i) {
+ darray_for_each(d->delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
- bch2_snapshot_node_delete(trans, *i));
- bch_err_msg(c, ret, "deleting snapshot %u", *i);
+ bch2_snapshot_node_delete(trans, i->id));
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting snapshot %u", i->id);
if (ret)
- goto err_create_lock;
+ goto err;
}
-err_create_lock:
- up_write(&c->snapshot_create_lock);
err:
- darray_exit(&deleted_interior);
- darray_exit(&deleted);
+ mutex_lock(&d->progress_lock);
+ darray_exit(&d->deleting_from_trees);
+ darray_exit(&d->delete_interior);
+ darray_exit(&d->delete_leaves);
+ d->running = false;
+ mutex_unlock(&d->progress_lock);
bch2_trans_put(trans);
- bch_err_fn(c, ret);
+out_unlock:
+ mutex_unlock(&d->lock);
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_fn(c, ret);
return ret;
}
+int bch2_delete_dead_snapshots(struct bch_fs *c)
+{
+ if (!c->opts.auto_snapshot_deletion)
+ return 0;
+
+ return __bch2_delete_dead_snapshots(c);
+}
+
void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
+ struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work);
+
+ set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
bch2_delete_dead_snapshots(c);
- bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
- !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+ if (!c->opts.auto_snapshot_deletion)
+ return;
+
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots))
+ return;
+
+ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ if (!queue_work(c->write_ref_wq, &c->snapshot_delete.work))
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
+}
+
+void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ if (!d->running) {
+ prt_str(out, "(not running)");
+ return;
+ }
+
+ mutex_lock(&d->progress_lock);
+ bch2_snapshot_delete_nodes_to_text(out, d);
+
+ bch2_bbpos_to_text(out, d->pos);
+ mutex_unlock(&d->progress_lock);
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
@@ -1698,18 +1934,10 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
struct bkey_s_c k;
int ret;
- bch2_trans_iter_init(trans, &iter, id, pos,
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_ALL_SNAPSHOTS);
- while (1) {
- k = bch2_btree_iter_prev(&iter);
- ret = bkey_err(k);
- if (ret)
- break;
-
- if (!k.k)
- break;
-
+ for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos),
+ BTREE_ITER_not_extents|
+ BTREE_ITER_all_snapshots,
+ k, ret) {
if (!bkey_eq(pos, k.k->p))
break;
@@ -1723,133 +1951,36 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return ret;
}
-static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
-{
- const struct snapshot_t *s = snapshot_t(c, id);
-
- return s->children[1] ?: s->children[0];
-}
-
-static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
-{
- u32 child;
-
- while ((child = bch2_snapshot_smallest_child(c, id)))
- id = child;
- return id;
-}
-
-static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
- enum btree_id btree,
- struct bkey_s_c interior_k,
- u32 leaf_id, struct bpos *new_min_pos)
-{
- struct btree_iter iter;
- struct bpos pos = interior_k.k->p;
- struct bkey_s_c k;
- struct bkey_i *new;
- int ret;
-
- pos.snapshot = leaf_id;
-
- bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
- if (ret)
- goto out;
-
- /* key already overwritten in this snapshot? */
- if (k.k->p.snapshot != interior_k.k->p.snapshot)
- goto out;
-
- if (bpos_eq(*new_min_pos, POS_MIN)) {
- *new_min_pos = k.k->p;
- new_min_pos->snapshot = leaf_id;
- }
-
- new = bch2_bkey_make_mut_noupdate(trans, interior_k);
- ret = PTR_ERR_OR_ZERO(new);
- if (ret)
- goto out;
-
- new->k.p.snapshot = leaf_id;
- ret = bch2_trans_update(trans, &iter, new, 0);
-out:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
- enum btree_id btree,
- struct bkey_s_c k,
- struct bpos *new_min_pos)
+static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
{
- struct bch_fs *c = trans->c;
- struct bkey_buf sk;
- u32 restart_count = trans->restart_count;
- int ret = 0;
-
- bch2_bkey_buf_init(&sk);
- bch2_bkey_buf_reassemble(&sk, c, k);
- k = bkey_i_to_s_c(sk.k);
-
- *new_min_pos = POS_MIN;
-
- for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
- id < k.k->p.snapshot;
- id++) {
- if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
- !bch2_snapshot_is_leaf(c, id))
- continue;
-again:
- ret = btree_trans_too_many_iters(trans) ?:
- bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?:
- bch2_trans_commit(trans, NULL, NULL, 0);
- if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
- bch2_trans_begin(trans);
- goto again;
- }
-
- if (ret)
- break;
- }
-
- bch2_bkey_buf_exit(&sk, c);
-
- return ret ?: trans_was_restarted(trans, restart_count);
+ /* If there's one child, it's redundant and keys will be moved to the child */
+ return !!snap.v->children[0] + !!snap.v->children[1] == 1;
}
static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
{
- struct bch_fs *c = trans->c;
- struct bkey_s_c_snapshot snap;
- int ret = 0;
-
if (k.k->type != KEY_TYPE_snapshot)
return 0;
- snap = bkey_s_c_to_snapshot(k);
- if (BCH_SNAPSHOT_DELETED(snap.v) ||
- bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
- (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
- set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
- return 0;
- }
+ struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
+ if (BCH_SNAPSHOT_WILL_DELETE(snap.v) ||
+ interior_snapshot_needs_delete(snap))
+ set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
- return ret;
+ return 0;
}
int bch2_snapshots_read(struct bch_fs *c)
{
+ /*
+ * Initializing the is_ancestor bitmaps requires ancestors to already be
+ * initialized - so mark in reverse:
+ */
int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k,
+ for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots,
+ POS_MAX, 0, k,
__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
- bch2_snapshot_set_equiv(trans, k) ?:
- bch2_check_snapshot_needs_deletion(trans, k)) ?:
- for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k,
- (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
+ bch2_check_snapshot_needs_deletion(trans, k)));
bch_err_fn(c, ret);
/*
@@ -1861,10 +1992,6 @@ int bch2_snapshots_read(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
- if (bch2_err_matches(ret, EIO) ||
- (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
- ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
-
return ret;
}
@@ -1872,3 +1999,11 @@ void bch2_fs_snapshots_exit(struct bch_fs *c)
{
kvfree(rcu_dereference_protected(c->snapshots, true));
}
+
+void bch2_fs_snapshots_init_early(struct bch_fs *c)
+{
+ INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work);
+ mutex_init(&c->snapshot_delete.lock);
+ mutex_init(&c->snapshot_delete.progress_lock);
+ mutex_init(&c->snapshots_unlinked_lock);
+}