summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_update_interior.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_update_interior.c')
-rw-r--r--fs/bcachefs/btree_update_interior.c790
1 files changed, 458 insertions, 332 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 6030c396754f..74e65714fecd 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -14,8 +14,10 @@
#include "btree_locking.h"
#include "buckets.h"
#include "clock.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
+#include "io_write.h"
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
@@ -34,26 +36,12 @@ static const char * const bch2_btree_update_modes[] = {
NULL
};
+static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *);
+
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
-static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
- enum btree_id btree_id,
- unsigned level,
- struct bpos pos)
-{
- btree_path_idx_t path_idx = bch2_path_get(trans, btree_id, pos, level + 1, level,
- BTREE_ITER_NOPRESERVE|
- BTREE_ITER_INTENT, _RET_IP_);
- path_idx = bch2_btree_path_make_mut(trans, path_idx, true, _RET_IP_);
-
- struct btree_path *path = trans->paths + path_idx;
- bch2_btree_path_downgrade(trans, path);
- __bch2_btree_path_unlock(trans, path);
- return path_idx;
-}
-
/*
* Verify that child nodes correctly span parent node's range:
*/
@@ -69,17 +57,38 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bkey_buf prev;
int ret = 0;
+ printbuf_indent_add_nextline(&buf, 2);
+
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
b->data->min_key));
- if (!b->c.level)
- return 0;
-
bch2_bkey_buf_init(&prev);
bkey_init(&prev.k->k);
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+ if (b == btree_node_root(c, b)) {
+ if (!bpos_eq(b->data->min_key, POS_MIN)) {
+ ret = __bch2_topology_error(c, &buf);
+
+ bch2_bpos_to_text(&buf, b->data->min_key);
+ log_fsck_err(trans, btree_root_bad_min_key,
+ "btree root with incorrect min_key: %s", buf.buf);
+ goto out;
+ }
+
+ if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
+ ret = __bch2_topology_error(c, &buf);
+ bch2_bpos_to_text(&buf, b->data->max_key);
+ log_fsck_err(trans, btree_root_bad_max_key,
+ "btree root with incorrect max_key: %s", buf.buf);
+ goto out;
+ }
+ }
+
+ if (!b->c.level)
+ goto out;
+
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2)
goto out;
@@ -91,20 +100,19 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
: bpos_successor(prev.k->k.p);
if (!bpos_eq(expected_min, bp.v->min_key)) {
- bch2_topology_error(c);
+ ret = __bch2_topology_error(c, &buf);
- printbuf_reset(&buf);
- prt_str(&buf, "end of prev node doesn't match start of next node\n"),
- prt_printf(&buf, " in btree %s level %u node ",
- bch2_btree_id_str(b->c.btree_id), b->c.level);
+ prt_str(&buf, "end of prev node doesn't match start of next node\nin ");
+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
+ prt_str(&buf, " node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- prt_str(&buf, "\n prev ");
+ prt_str(&buf, "\nprev ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
- prt_str(&buf, "\n next ");
+ prt_str(&buf, "\nnext ");
bch2_bkey_val_to_text(&buf, c, k);
- need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf);
- goto topology_repair;
+ log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf);
+ goto out;
}
bch2_bkey_buf_reassemble(&prev, c, k);
@@ -112,29 +120,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
}
if (bkey_deleted(&prev.k->k)) {
- bch2_topology_error(c);
+ ret = __bch2_topology_error(c, &buf);
- printbuf_reset(&buf);
- prt_str(&buf, "empty interior node\n");
- prt_printf(&buf, " in btree %s level %u node ",
- bch2_btree_id_str(b->c.btree_id), b->c.level);
+ prt_str(&buf, "empty interior node\nin ");
+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
+ prt_str(&buf, " node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf);
- goto topology_repair;
+ log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf);
} else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
- bch2_topology_error(c);
+ ret = __bch2_topology_error(c, &buf);
- printbuf_reset(&buf);
- prt_str(&buf, "last child node doesn't end at end of parent node\n");
- prt_printf(&buf, " in btree %s level %u node ",
- bch2_btree_id_str(b->c.btree_id), b->c.level);
+ prt_str(&buf, "last child node doesn't end at end of parent node\nin ");
+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
+ prt_str(&buf, " node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- prt_str(&buf, "\n last key ");
+ prt_str(&buf, "\nlast key ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
- need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf);
- goto topology_repair;
+ log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf);
}
out:
fsck_err:
@@ -142,15 +146,6 @@ fsck_err:
bch2_bkey_buf_exit(&prev, c);
printbuf_exit(&buf);
return ret;
-topology_repair:
- if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
- bch2_inconsistent_error(c);
- ret = -BCH_ERR_btree_need_topology_repair;
- } else {
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- }
- goto out;
}
/* Calculate ideal packed bkey format for new btree nodes: */
@@ -158,7 +153,6 @@ topology_repair:
static void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
{
struct bkey_packed *k;
- struct bset_tree *t;
struct bkey uk;
for_each_bset(b, t)
@@ -235,10 +229,6 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b)
BUG_ON(b->will_make_reachable);
clear_btree_node_noevict(b);
-
- mutex_lock(&c->btree_cache.lock);
- list_move(&b->list, &c->btree_cache.freeable);
- mutex_unlock(&c->btree_cache.lock);
}
static void bch2_btree_node_free_inmem(struct btree_trans *trans,
@@ -246,19 +236,19 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
struct btree *b)
{
struct bch_fs *c = trans->c;
- unsigned i, level = b->c.level;
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+
__btree_node_free(trans, b);
+
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_unlock(&c->btree_cache.lock);
+
six_unlock_write(&b->c.lock);
- mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
- trans_for_each_path(trans, path, i)
- if (path->l[level].b == b) {
- btree_node_unlock(trans, path, level);
- path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
- }
+ bch2_trans_node_drop(trans, b);
}
static void bch2_btree_node_free_never_used(struct btree_update *as,
@@ -267,8 +257,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
{
struct bch_fs *c = as->c;
struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
- struct btree_path *path;
- unsigned i, level = b->c.level;
BUG_ON(!list_empty(&b->write_blocked));
BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
@@ -282,8 +270,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
clear_btree_node_need_write(b);
mutex_lock(&c->btree_cache.lock);
- list_del_init(&b->list);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
mutex_unlock(&c->btree_cache.lock);
BUG_ON(p->nr >= ARRAY_SIZE(p->b));
@@ -291,17 +278,14 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
six_unlock_intent(&b->c.lock);
- trans_for_each_path(trans, path, i)
- if (path->l[level].b == b) {
- btree_node_unlock(trans, path, level);
- path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
- }
+ bch2_trans_node_drop(trans, b);
}
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
bool interior_node,
+ unsigned target,
unsigned flags)
{
struct bch_fs *c = trans->c;
@@ -316,6 +300,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
: 0;
int ret;
+ b = bch2_btree_node_mem_alloc(trans, interior_node);
+ if (IS_ERR(b))
+ return b;
+
+ BUG_ON(b->ob.nr);
+
mutex_lock(&c->btree_reserve_cache_lock);
if (c->btree_reserve_cache_nr > nr_reserve) {
struct btree_alloc *a =
@@ -324,12 +314,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
obs = a->ob;
bkey_copy(&tmp.k, &a->k);
mutex_unlock(&c->btree_reserve_cache_lock);
- goto mem_alloc;
+ goto out;
}
mutex_unlock(&c->btree_reserve_cache_lock);
-
retry:
ret = bch2_alloc_sectors_start_trans(trans,
+ target ?:
c->opts.metadata_target ?:
c->opts.foreground_target,
0,
@@ -338,9 +328,11 @@ retry:
res->nr_replicas,
min(res->nr_replicas,
c->opts.metadata_replicas_required),
- watermark, 0, cl, &wp);
+ watermark,
+ target ? BCH_WRITE_only_specified_devs : 0,
+ cl, &wp);
if (unlikely(ret))
- return ERR_PTR(ret);
+ goto err;
if (wp->sectors_free < btree_sectors(c)) {
struct open_bucket *ob;
@@ -359,19 +351,16 @@ retry:
bch2_open_bucket_get(c, wp, &obs);
bch2_alloc_sectors_done(c, wp);
-mem_alloc:
- b = bch2_btree_node_mem_alloc(trans, interior_node);
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
-
- /* we hold cannibalize_lock: */
- BUG_ON(IS_ERR(b));
- BUG_ON(b->ob.nr);
-
+out:
bkey_copy(&b->key, &tmp.k);
b->ob = obs;
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
return b;
+err:
+ bch2_btree_node_to_freelist(c, b);
+ return ERR_PTR(ret);
}
static struct btree *bch2_btree_node_alloc(struct btree_update *as,
@@ -513,8 +502,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
__btree_node_free(trans, b);
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
+ bch2_btree_node_to_freelist(c, b);
}
}
}
@@ -522,6 +510,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
static int bch2_btree_reserve_get(struct btree_trans *trans,
struct btree_update *as,
unsigned nr_nodes[2],
+ unsigned target,
unsigned flags,
struct closure *cl)
{
@@ -544,7 +533,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
while (p->nr < nr_nodes[interior]) {
b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
- interior, flags);
+ interior, target, flags);
if (IS_ERR(b)) {
ret = PTR_ERR(b);
goto err;
@@ -646,7 +635,7 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k),
- BTREE_TRIGGER_TRANSACTIONAL);
+ BTREE_TRIGGER_transactional);
if (ret)
return ret;
}
@@ -655,7 +644,7 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k),
- BTREE_TRIGGER_TRANSACTIONAL);
+ BTREE_TRIGGER_transactional);
if (ret)
return ret;
}
@@ -663,6 +652,14 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
return 0;
}
+/* If the node has been reused, we might be reading uninitialized memory - that's fine: */
+static noinline __no_kmsan_checks bool btree_node_seq_matches(struct btree *b, __le64 seq)
+{
+ struct btree_node *b_data = READ_ONCE(b->data);
+
+ return (b_data ? b_data->keys.seq : 0) == seq;
+}
+
static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
@@ -691,15 +688,9 @@ static void btree_update_nodes_written(struct btree_update *as)
* on disk:
*/
for (i = 0; i < as->nr_old_nodes; i++) {
- __le64 seq;
-
b = as->old_nodes[i];
- btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
- seq = b->data ? b->data->keys.seq : 0;
- six_unlock_read(&b->c.lock);
-
- if (seq == as->old_nodes_seq[i])
+ if (btree_node_seq_matches(b, as->old_nodes_seq[i]))
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
TASK_UNINTERRUPTIBLE);
}
@@ -729,15 +720,24 @@ static void btree_update_nodes_written(struct btree_update *as)
"%s", bch2_err_str(ret));
err:
/*
+ * Ensure transaction is unlocked before using btree_node_lock_nopath()
+ * (the use of which is always suspect, we need to work on removing this
+ * in the future)
+ *
+ * It should be, but bch2_path_get_unlocked_mut() -> bch2_path_get()
+ * calls bch2_path_upgrade(), before we call path_make_mut(), so we may
+ * rarely end up with a locked path besides the one we have here:
+ */
+ bch2_trans_unlock(trans);
+ bch2_trans_begin(trans);
+
+ /*
* We have to be careful because another thread might be getting ready
* to free as->b and calling btree_update_reparent() on us - we'll
* recheck under btree_update_lock below:
*/
b = READ_ONCE(as->b);
if (b) {
- btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
- as->btree_id, b->c.level, b->key.k.p);
- struct btree_path *path = trans->paths + path_idx;
/*
* @b is the node we did the final insert into:
*
@@ -750,17 +750,9 @@ err:
* we're in journal error state:
*/
- /*
- * Ensure transaction is unlocked before using
- * btree_node_lock_nopath() (the use of which is always suspect,
- * we need to work on removing this in the future)
- *
- * It should be, but get_unlocked_mut_path() -> bch2_path_get()
- * calls bch2_path_upgrade(), before we call path_make_mut(), so
- * we may rarely end up with a locked path besides the one we
- * have here:
- */
- bch2_trans_unlock(trans);
+ btree_path_idx_t path_idx = bch2_path_get_unlocked_mut(trans,
+ as->btree_id, b->c.level, b->key.k.p);
+ struct btree_path *path = trans->paths + path_idx;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED);
path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
@@ -805,7 +797,7 @@ err:
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
six_unlock_write(&b->c.lock);
- btree_node_write_if_need(c, b, SIX_LOCK_intent);
+ btree_node_write_if_need(trans, b, SIX_LOCK_intent);
btree_node_unlock(trans, path, b->c.level);
bch2_path_put(trans, path_idx, true);
}
@@ -826,7 +818,7 @@ err:
b = as->new_nodes[i];
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
- btree_node_write_if_need(c, b, SIX_LOCK_read);
+ btree_node_write_if_need(trans, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
@@ -1130,7 +1122,8 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level_start, bool split, unsigned flags)
+ unsigned level_start, bool split,
+ unsigned target, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
@@ -1154,13 +1147,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags |= watermark;
if (watermark < BCH_WATERMARK_reclaim &&
- test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+ test_bit(JOURNAL_space_low, &c->journal.flags)) {
if (flags & BCH_TRANS_COMMIT_journal_reclaim)
return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
- bch2_trans_unlock(trans);
- wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
- ret = bch2_trans_relock(trans);
+ ret = drop_locks_do(trans,
+ ({ wait_event(c->journal.wait, !test_bit(JOURNAL_space_low, &c->journal.flags)); 0; }));
if (ret)
return ERR_PTR(ret);
}
@@ -1206,7 +1198,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as->start_time = start_time;
as->ip_started = _RET_IP_;
as->mode = BTREE_UPDATE_none;
- as->watermark = watermark;
+ as->flags = flags;
as->took_gc_lock = true;
as->btree_id = path->btree_id;
as->update_level_start = level_start;
@@ -1236,12 +1228,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
ret = bch2_disk_reservation_get(c, &as->disk_res,
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
- c->opts.metadata_replicas,
+ READ_ONCE(c->opts.metadata_replicas),
disk_res_flags);
if (ret)
goto err;
- ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, NULL);
if (bch2_err_matches(ret, ENOSPC) ||
bch2_err_matches(ret, ENOMEM)) {
struct closure cl;
@@ -1260,10 +1252,10 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
closure_init_stack(&cl);
do {
- ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl);
bch2_trans_unlock(trans);
- closure_sync(&cl);
+ bch2_wait_on_allocator(c, &cl);
} while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
}
@@ -1283,7 +1275,8 @@ err:
bch2_btree_update_free(as, trans);
if (!bch2_err_matches(ret, ENOSPC) &&
!bch2_err_matches(ret, EROFS) &&
- ret != -BCH_ERR_journal_reclaim_would_deadlock)
+ ret != -BCH_ERR_journal_reclaim_would_deadlock &&
+ ret != -BCH_ERR_journal_shutdown)
bch_err_fn_ratelimited(c, ret);
return ERR_PTR(ret);
}
@@ -1355,26 +1348,23 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
struct bch_fs *c = as->c;
struct bkey_packed *k;
struct printbuf buf = PRINTBUF;
- unsigned long old, new, v;
+ unsigned long old, new;
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
- !btree_ptr_sectors_written(insert));
+ !btree_ptr_sectors_written(bkey_i_to_s_c(insert)));
- if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
+ if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)))
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
- if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
- btree_node_type(b), WRITE, &buf) ?:
- bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) {
- printbuf_reset(&buf);
- prt_printf(&buf, "inserting invalid bkey\n ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
- prt_printf(&buf, "\n ");
- bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
- btree_node_type(b), WRITE, &buf);
- bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf);
-
- bch2_fs_inconsistent(c, "%s", buf.buf);
+ struct bkey_validate_context from = (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = b->c.level,
+ .btree = b->c.btree_id,
+ .flags = BCH_VALIDATE_commit,
+ };
+ if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), from) ?:
+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), from)) {
+ bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__);
dump_stack();
}
@@ -1394,19 +1384,19 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
set_btree_node_dirty_acct(c, b);
- v = READ_ONCE(b->flags);
+ old = READ_ONCE(b->flags);
do {
- old = new = v;
+ new = old;
new &= ~BTREE_WRITE_TYPE_MASK;
new |= BTREE_WRITE_interior;
new |= 1 << BTREE_NODE_need_write;
- } while ((v = cmpxchg(&b->flags, old, new)) != old);
+ } while (!try_cmpxchg(&b->flags, &old, new));
printbuf_exit(&buf);
}
-static void
+static int
bch2_btree_insert_keys_interior(struct btree_update *as,
struct btree_trans *trans,
struct btree_path *path,
@@ -1423,15 +1413,40 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
(bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
;
- while (!bch2_keylist_empty(keys)) {
- insert = bch2_keylist_front(keys);
+ for (;
+ insert != keys->top && bpos_le(insert->k.p, b->key.k.p);
+ insert = bkey_next(insert))
+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
- if (bpos_gt(insert->k.p, b->key.k.p))
- break;
+ int ret = bch2_btree_node_check_topology(trans, b);
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
- bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
- bch2_keylist_pop_front(keys);
+ for (struct bkey_i *k = keys->keys;
+ k != insert;
+ k = bkey_next(k)) {
+ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k));
+ prt_newline(&buf);
+ }
+
+ bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s",
+ (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf);
+ dump_stack();
+ return ret;
}
+
+ memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data);
+ keys->top_p -= insert->_data - keys->keys_p;
+ return 0;
+}
+
+static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos)
+{
+ if (insert_keys)
+ for_each_keylist_key(insert_keys, k)
+ if (bkey_deleted(&k->k) && bpos_eq(k->k.p, pos))
+ return true;
+ return false;
}
/*
@@ -1441,7 +1456,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
static void __btree_split_node(struct btree_update *as,
struct btree_trans *trans,
struct btree *b,
- struct btree *n[2])
+ struct btree *n[2],
+ struct keylist *insert_keys)
{
struct bkey_packed *k;
struct bpos n1_pos = POS_MIN;
@@ -1476,7 +1492,8 @@ static void __btree_split_node(struct btree_update *as,
if (b->c.level &&
u64s < n1_u64s &&
u64s + k->u64s >= n1_u64s &&
- bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
+ (bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p) ||
+ key_deleted_in_insert(insert_keys, uk.p)))
n1_u64s += k->u64s;
i = u64s >= n1_u64s;
@@ -1554,11 +1571,11 @@ static void __btree_split_node(struct btree_update *as,
* nodes that were coalesced, and thus in the middle of a child node post
* coalescing:
*/
-static void btree_split_insert_keys(struct btree_update *as,
- struct btree_trans *trans,
- btree_path_idx_t path_idx,
- struct btree *b,
- struct keylist *keys)
+static int btree_split_insert_keys(struct btree_update *as,
+ struct btree_trans *trans,
+ btree_path_idx_t path_idx,
+ struct btree *b,
+ struct keylist *keys)
{
struct btree_path *path = trans->paths + path_idx;
@@ -1568,10 +1585,12 @@ static void btree_split_insert_keys(struct btree_update *as,
bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
- bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
-
- BUG_ON(bch2_btree_node_check_topology(trans, b));
+ int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ if (ret)
+ return ret;
}
+
+ return 0;
}
static int btree_split(struct btree_update *as, struct btree_trans *trans,
@@ -1593,8 +1612,6 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (ret)
return ret;
- bch2_btree_interior_update_will_free_node(as, b);
-
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
struct btree *n[2];
@@ -1603,11 +1620,13 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
- __btree_split_node(as, trans, b, n);
+ __btree_split_node(as, trans, b, n, keys);
if (keys) {
- btree_split_insert_keys(as, trans, path, n1, keys);
- btree_split_insert_keys(as, trans, path, n2, keys);
+ ret = btree_split_insert_keys(as, trans, path, n1, keys) ?:
+ btree_split_insert_keys(as, trans, path, n2, keys);
+ if (ret)
+ goto err;
BUG_ON(!bch2_keylist_empty(keys));
}
@@ -1619,12 +1638,12 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock);
- path1 = get_unlocked_mut_path(trans, as->btree_id, n1->c.level, n1->key.k.p);
+ path1 = bch2_path_get_unlocked_mut(trans, as->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, trans->paths + path1, n1);
- path2 = get_unlocked_mut_path(trans, as->btree_id, n2->c.level, n2->key.k.p);
+ path2 = bch2_path_get_unlocked_mut(trans, as->btree_id, n2->c.level, n2->key.k.p);
six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, trans->paths + path2, n2->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, trans->paths + path2, n2);
@@ -1653,7 +1672,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
- btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+ ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+ if (ret)
+ goto err;
}
} else {
trace_and_count(c, btree_node_compact, trans, b);
@@ -1661,7 +1682,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
n1 = bch2_btree_node_alloc_replacement(as, trans, b);
if (keys) {
- btree_split_insert_keys(as, trans, path, n1, keys);
+ ret = btree_split_insert_keys(as, trans, path, n1, keys);
+ if (ret)
+ goto err;
BUG_ON(!bch2_keylist_empty(keys));
}
@@ -1669,7 +1692,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_update_add_new_node(as, n1);
six_unlock_write(&n1->c.lock);
- path1 = get_unlocked_mut_path(trans, as->btree_id, n1->c.level, n1->key.k.p);
+ path1 = bch2_path_get_unlocked_mut(trans, as->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, trans->paths + path1, n1);
@@ -1693,16 +1716,18 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (ret)
goto err;
+ bch2_btree_interior_update_will_free_node(as, b);
+
if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
- bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+ bch2_btree_node_write_trans(trans, n3, SIX_LOCK_intent, 0);
}
if (n2) {
bch2_btree_update_get_open_buckets(as, n2);
- bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+ bch2_btree_node_write_trans(trans, n2, SIX_LOCK_intent, 0);
}
bch2_btree_update_get_open_buckets(as, n1);
- bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
+ bch2_btree_node_write_trans(trans, n1, SIX_LOCK_intent, 0);
/*
* The old node must be freed (in memory) _before_ unlocking the new
@@ -1777,11 +1802,24 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
int ret;
lockdep_assert_held(&c->gc_lock);
- BUG_ON(!btree_node_intent_locked(path, b->c.level));
BUG_ON(!b->c.level);
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
+ if (!btree_node_intent_locked(path, b->c.level)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "%s(): node not locked at level %u\n",
+ __func__, b->c.level);
+ bch2_btree_update_to_text(&buf, as);
+ bch2_btree_path_to_text(&buf, trans, path_idx);
+ bch2_fs_emergency_read_only2(c, &buf);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ return -EIO;
+ }
+
ret = bch2_btree_node_lock_write(trans, path, &b->c);
if (ret)
return ret;
@@ -1793,15 +1831,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
goto split;
}
- ret = bch2_btree_node_check_topology(trans, b);
+
+ ret = bch2_btree_node_check_topology(trans, b) ?:
+ bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
if (ret) {
bch2_btree_node_unlock_write(trans, path, b);
return ret;
}
- bch2_btree_insert_keys_interior(as, trans, path, b,
- path->l[b->c.level].iter, keys);
-
trans_for_each_path_with_node(trans, b, linked, i)
bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
@@ -1821,8 +1859,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
btree_update_updated_node(as, b);
bch2_btree_node_unlock_write(trans, path, b);
-
- BUG_ON(bch2_btree_node_check_topology(trans, b));
return 0;
split:
/*
@@ -1849,7 +1885,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
as = bch2_btree_update_start(trans, trans->paths + path,
trans->paths[path].level,
- true, flags);
+ true, 0, flags);
if (IS_ERR(as))
return PTR_ERR(as);
@@ -1899,12 +1935,12 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
BUG_ON(ret);
bch2_btree_update_get_open_buckets(as, n);
- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_trans_node_add(trans, path, n);
six_unlock_intent(&n->c.lock);
mutex_lock(&c->btree_cache.lock);
- list_add_tail(&b->list, &c->btree_cache.live);
+ list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list);
mutex_unlock(&c->btree_cache.lock);
bch2_trans_verify_locks(trans);
@@ -1919,7 +1955,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
return bch2_btree_split_leaf(trans, path, flags);
struct btree_update *as =
- bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
+ bch2_btree_update_start(trans, trans->paths + path, b->c.level,
+ true, 0, flags);
if (IS_ERR(as))
return PTR_ERR(as);
@@ -1947,6 +1984,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
u64 start_time = local_clock();
int ret = 0;
+ bch2_trans_verify_not_unlocked_or_in_restart(trans);
BUG_ON(!trans->paths[path].should_be_locked);
BUG_ON(!btree_node_locked(&trans->paths[path], level));
@@ -1960,7 +1998,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
return 0;
- flags &= ~BCH_WATERMARK_MASK;
+ if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) {
+ flags &= ~BCH_WATERMARK_MASK;
+ flags |= BCH_WATERMARK_btree;
+ flags |= BCH_TRANS_COMMIT_journal_reclaim;
+ }
b = trans->paths[path].l[level].b;
@@ -1975,12 +2017,12 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
: bpos_successor(b->data->max_key);
sib_path = bch2_path_get(trans, btree, sib_pos,
- U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_);
+ U8_MAX, level, BTREE_ITER_intent, _THIS_IP_);
ret = bch2_btree_path_traverse(trans, sib_path, false);
if (ret)
goto err;
- btree_path_set_should_be_locked(trans->paths + sib_path);
+ btree_path_set_should_be_locked(trans, trans->paths + sib_path);
m = trans->paths[sib_path].l[level].b;
@@ -1999,18 +2041,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
}
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-
- bch2_bpos_to_text(&buf1, prev->data->max_key);
- bch2_bpos_to_text(&buf2, next->data->min_key);
- bch_err(c,
- "%s(): btree topology error:\n"
- " prev ends at %s\n"
- " next starts at %s",
- __func__, buf1.buf, buf2.buf);
- printbuf_exit(&buf1);
- printbuf_exit(&buf2);
- ret = bch2_topology_error(c);
+ struct printbuf buf = PRINTBUF;
+
+ printbuf_indent_add_nextline(&buf, 2);
+ prt_printf(&buf, "%s(): ", __func__);
+ ret = __bch2_topology_error(c, &buf);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "prev ends at ");
+ bch2_bpos_to_text(&buf, prev->data->max_key);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "next starts at ");
+ bch2_bpos_to_text(&buf, next->data->min_key);
+
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
goto err;
}
@@ -2039,16 +2085,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
parent = btree_node_parent(trans->paths + path, b);
as = bch2_btree_update_start(trans, trans->paths + path, level, false,
- BCH_TRANS_COMMIT_no_enospc|flags);
+ 0, BCH_TRANS_COMMIT_no_enospc|flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto err;
trace_and_count(c, btree_node_merge, trans, b);
- bch2_btree_interior_update_will_free_node(as, b);
- bch2_btree_interior_update_will_free_node(as, m);
-
n = bch2_btree_node_alloc(as, trans, b->c.level);
SET_BTREE_NODE_SEQ(n->data,
@@ -2068,7 +2111,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_update_add_new_node(as, n);
six_unlock_write(&n->c.lock);
- new_path = get_unlocked_mut_path(trans, btree, n->c.level, n->key.k.p);
+ new_path = bch2_path_get_unlocked_mut(trans, btree, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, trans->paths + new_path, n);
@@ -2084,10 +2127,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
if (ret)
goto err_free_update;
+ bch2_btree_interior_update_will_free_node(as, b);
+ bch2_btree_interior_update_will_free_node(as, m);
+
bch2_trans_verify_paths(trans);
bch2_btree_update_get_open_buckets(as, n);
- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_btree_node_free_inmem(trans, trans->paths + path, b);
bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m);
@@ -2118,9 +2164,35 @@ err_free_update:
goto out;
}
+static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree *b)
+{
+ bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p,
+ BTREE_MAX_DEPTH, b->c.level,
+ BTREE_ITER_intent);
+ int ret = bch2_btree_iter_traverse(trans, iter);
+ if (ret)
+ goto err;
+
+ /* has node been freed? */
+ if (btree_iter_path(trans, iter)->l[b->c.level].b != b) {
+ /* node has been freed: */
+ BUG_ON(!btree_node_dying(b));
+ ret = -BCH_ERR_btree_node_dying;
+ goto err;
+ }
+
+ BUG_ON(!btree_node_hashed(b));
+ return 0;
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+}
+
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
+ unsigned target,
unsigned flags)
{
struct bch_fs *c = trans->c;
@@ -2133,20 +2205,19 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_path *path = btree_iter_path(trans, iter);
parent = btree_node_parent(path, b);
- as = bch2_btree_update_start(trans, path, b->c.level, false, flags);
+ as = bch2_btree_update_start(trans, path, b->c.level,
+ false, target, flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto out;
- bch2_btree_interior_update_will_free_node(as, b);
-
n = bch2_btree_node_alloc_replacement(as, trans, b);
bch2_btree_build_aux_trees(n);
bch2_btree_update_add_new_node(as, n);
six_unlock_write(&n->c.lock);
- new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
+ new_path = bch2_path_get_unlocked_mut(trans, iter->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, trans->paths + new_path, n);
@@ -2163,8 +2234,10 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
if (ret)
goto err;
+ bch2_btree_interior_update_will_free_node(as, b);
+
bch2_btree_update_get_open_buckets(as, n);
- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b);
@@ -2183,135 +2256,173 @@ err:
goto out;
}
-struct async_btree_rewrite {
- struct bch_fs *c;
- struct work_struct work;
- struct list_head list;
- enum btree_id btree_id;
- unsigned level;
- struct bpos pos;
- __le64 seq;
-};
-
-static int async_btree_node_rewrite_trans(struct btree_trans *trans,
- struct async_btree_rewrite *a)
+static int bch2_btree_node_rewrite_key(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_i *k, unsigned flags)
{
- struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct btree *b;
- int ret;
-
- bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos,
- BTREE_MAX_DEPTH, a->level, 0);
- b = bch2_btree_iter_peek_node(&iter);
- ret = PTR_ERR_OR_ZERO(b);
+ bch2_trans_node_iter_init(trans, &iter,
+ btree, k->k.p,
+ BTREE_MAX_DEPTH, level, 0);
+ struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
+ int ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto out;
- if (!b || b->data->keys.seq != a->seq) {
- struct printbuf buf = PRINTBUF;
+ bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k);
+ ret = found
+ ? bch2_btree_node_rewrite(trans, &iter, b, 0, flags)
+ : -ENOENT;
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
- if (b)
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- else
- prt_str(&buf, "(null");
- bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s",
- __func__, a->seq, buf.buf);
- printbuf_exit(&buf);
- goto out;
- }
+int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bpos pos,
+ unsigned target,
+ unsigned flags)
+{
+ BUG_ON(!level);
- ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
-out:
+ /* Traverse one depth lower to get a pointer to the node itself: */
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0);
+ struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, target, flags);
+err:
bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans,
+ struct btree *b, unsigned flags)
+{
+ struct btree_iter iter;
+ int ret = get_iter_to_node(trans, &iter, b);
+ if (ret)
+ return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0, flags);
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
+struct async_btree_rewrite {
+ struct bch_fs *c;
+ struct work_struct work;
+ struct list_head list;
+ enum btree_id btree_id;
+ unsigned level;
+ struct bkey_buf key;
+};
+
static void async_btree_node_rewrite_work(struct work_struct *work)
{
struct async_btree_rewrite *a =
container_of(work, struct async_btree_rewrite, work);
struct bch_fs *c = a->c;
- int ret;
- ret = bch2_trans_do(c, NULL, NULL, 0,
- async_btree_node_rewrite_trans(trans, a));
- bch_err_fn_ratelimited(c, ret);
- bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
+ int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans,
+ a->btree_id, a->level, a->key.k, 0));
+ if (ret != -ENOENT &&
+ !bch2_err_matches(ret, EROFS) &&
+ ret != -BCH_ERR_journal_shutdown)
+ bch_err_fn_ratelimited(c, ret);
+
+ spin_lock(&c->btree_node_rewrites_lock);
+ list_del(&a->list);
+ spin_unlock(&c->btree_node_rewrites_lock);
+
+ closure_wake_up(&c->btree_node_rewrites_wait);
+
+ bch2_bkey_buf_exit(&a->key, c);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
{
- struct async_btree_rewrite *a;
- int ret;
-
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (!a) {
- bch_err(c, "%s: error allocating memory", __func__);
+ struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS);
+ if (!a)
return;
- }
a->c = c;
a->btree_id = b->c.btree_id;
a->level = b->c.level;
- a->pos = b->key.k.p;
- a->seq = b->data->keys.seq;
INIT_WORK(&a->work, async_btree_node_rewrite_work);
- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
- mutex_lock(&c->pending_node_rewrites_lock);
- list_add(&a->list, &c->pending_node_rewrites);
- mutex_unlock(&c->pending_node_rewrites_lock);
- return;
- }
+ bch2_bkey_buf_init(&a->key);
+ bch2_bkey_buf_copy(&a->key, c, &b->key);
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
- if (test_bit(BCH_FS_started, &c->flags)) {
- bch_err(c, "%s: error getting c->writes ref", __func__);
- kfree(a);
- return;
- }
+ bool now = false, pending = false;
- ret = bch2_fs_read_write_early(c);
- bch_err_msg(c, ret, "going read-write");
- if (ret) {
- kfree(a);
- return;
- }
+ spin_lock(&c->btree_node_rewrites_lock);
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) &&
+ enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
+ list_add(&a->list, &c->btree_node_rewrites);
+ now = true;
+ } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
+ list_add(&a->list, &c->btree_node_rewrites_pending);
+ pending = true;
+ }
+ spin_unlock(&c->btree_node_rewrites_lock);
- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
+ if (now) {
+ queue_work(c->btree_node_rewrite_worker, &a->work);
+ } else if (pending) {
+ /* bch2_do_pending_node_rewrites will execute */
+ } else {
+ bch2_bkey_buf_exit(&a->key, c);
+ kfree(a);
}
+}
- queue_work(c->btree_node_rewrite_worker, &a->work);
+void bch2_async_btree_node_rewrites_flush(struct bch_fs *c)
+{
+ closure_wait_event(&c->btree_node_rewrites_wait,
+ list_empty(&c->btree_node_rewrites));
}
void bch2_do_pending_node_rewrites(struct bch_fs *c)
{
- struct async_btree_rewrite *a, *n;
-
- mutex_lock(&c->pending_node_rewrites_lock);
- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
- list_del(&a->list);
+ while (1) {
+ spin_lock(&c->btree_node_rewrites_lock);
+ struct async_btree_rewrite *a =
+ list_pop_entry(&c->btree_node_rewrites_pending,
+ struct async_btree_rewrite, list);
+ if (a)
+ list_add(&a->list, &c->btree_node_rewrites);
+ spin_unlock(&c->btree_node_rewrites_lock);
+
+ if (!a)
+ break;
- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
+ enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_node_rewrite_worker, &a->work);
}
- mutex_unlock(&c->pending_node_rewrites_lock);
}
void bch2_free_pending_node_rewrites(struct bch_fs *c)
{
- struct async_btree_rewrite *a, *n;
+ while (1) {
+ spin_lock(&c->btree_node_rewrites_lock);
+ struct async_btree_rewrite *a =
+ list_pop_entry(&c->btree_node_rewrites_pending,
+ struct async_btree_rewrite, list);
+ spin_unlock(&c->btree_node_rewrites_lock);
- mutex_lock(&c->pending_node_rewrites_lock);
- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
- list_del(&a->list);
+ if (!a)
+ break;
+ bch2_bkey_buf_exit(&a->key, c);
kfree(a);
}
- mutex_unlock(&c->pending_node_rewrites_lock);
}
static int __bch2_btree_node_update_key(struct btree_trans *trans,
@@ -2322,17 +2433,17 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bool skip_triggers)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter2 = { NULL };
+ struct btree_iter iter2 = {};
struct btree *parent;
int ret;
if (!skip_triggers) {
ret = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s_c(&b->key),
- BTREE_TRIGGER_TRANSACTIONAL) ?:
+ BTREE_TRIGGER_transactional) ?:
bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s(new_key),
- BTREE_TRIGGER_TRANSACTIONAL);
+ BTREE_TRIGGER_transactional);
if (ret)
return ret;
}
@@ -2346,10 +2457,10 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
parent = btree_node_parent(btree_iter_path(trans, iter), b);
if (parent) {
- bch2_trans_copy_iter(&iter2, iter);
+ bch2_trans_copy_iter(trans, &iter2, iter);
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
- iter2.flags & BTREE_ITER_INTENT,
+ iter2.flags & BTREE_ITER_intent,
_THIS_IP_);
struct btree_path *path2 = btree_iter_path(trans, &iter2);
@@ -2360,8 +2471,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
trans->paths_sorted = false;
- ret = bch2_btree_iter_traverse(&iter2) ?:
- bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN);
+ ret = bch2_btree_iter_traverse(trans, &iter2) ?:
+ bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun);
if (ret)
goto err;
} else {
@@ -2388,7 +2499,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
if (new_hash) {
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, new_key);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
@@ -2440,6 +2552,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
}
new_hash = bch2_btree_node_mem_alloc(trans, false);
+ ret = PTR_ERR_OR_ZERO(new_hash);
+ if (ret)
+ goto err;
}
path->intent_ref++;
@@ -2447,14 +2562,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
commit_flags, skip_triggers);
--path->intent_ref;
- if (new_hash) {
- mutex_lock(&c->btree_cache.lock);
- list_move(&new_hash->list, &c->btree_cache.freeable);
- mutex_unlock(&c->btree_cache.lock);
-
- six_unlock_write(&new_hash->c.lock);
- six_unlock_intent(&new_hash->c.lock);
- }
+ if (new_hash)
+ bch2_btree_node_to_freelist(c, new_hash);
+err:
closure_sync(&cl);
bch2_btree_cache_cannibalize_unlock(trans);
return ret;
@@ -2465,31 +2575,15 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
unsigned commit_flags, bool skip_triggers)
{
struct btree_iter iter;
- int ret;
-
- bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p,
- BTREE_MAX_DEPTH, b->c.level,
- BTREE_ITER_INTENT);
- ret = bch2_btree_iter_traverse(&iter);
+ int ret = get_iter_to_node(trans, &iter, b);
if (ret)
- goto out;
-
- /* has node been freed? */
- if (btree_iter_path(trans, &iter)->l[b->c.level].b != b) {
- /* node has been freed: */
- BUG_ON(!btree_node_dying(b));
- goto out;
- }
-
- BUG_ON(!btree_node_hashed(b));
+ return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
- struct bch_extent_ptr *ptr;
bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
!bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
commit_flags, skip_triggers);
-out:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -2507,7 +2601,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
bch2_btree_set_root_inmem(c, b);
}
-static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level)
+int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id, unsigned level)
{
struct bch_fs *c = trans->c;
struct closure cl;
@@ -2524,6 +2618,10 @@ static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id
b = bch2_btree_node_mem_alloc(trans, false);
bch2_btree_cache_cannibalize_unlock(trans);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ return ret;
+
set_btree_node_fake(b);
set_btree_node_need_rewrite(b);
b->c.level = level;
@@ -2555,17 +2653,19 @@ static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id
void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
{
- bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
+ bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level)));
}
static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
- prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
- (void *) as->ip_started,
- bch2_btree_id_str(as->btree_id),
+ prt_printf(out, "%ps: ", (void *) as->ip_started);
+ bch2_trans_commit_flags_to_text(out, as->flags);
+
+ prt_str(out, " ");
+ bch2_btree_id_to_text(out, as->btree_id);
+ prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
as->update_level_start,
as->update_level_end,
- bch2_watermarks[as->watermark],
bch2_btree_update_modes[as->mode],
as->nodes_written,
closure_nr_remaining(&as->cl),
@@ -2640,8 +2740,33 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
return end;
}
+static void bch2_btree_alloc_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct btree_alloc *a)
+{
+ printbuf_indent_add(out, 2);
+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&a->k));
+ prt_newline(out);
+
+ struct open_bucket *ob;
+ unsigned i;
+ open_bucket_for_each(c, &a->ob, ob, i)
+ bch2_open_bucket_to_text(out, c, ob);
+
+ printbuf_indent_sub(out, 2);
+}
+
+void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ for (unsigned i = 0; i < c->btree_reserve_cache_nr; i++)
+ bch2_btree_alloc_to_text(out, c, &c->btree_reserve_cache[i]);
+}
+
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
{
+ WARN_ON(!list_empty(&c->btree_node_rewrites));
+ WARN_ON(!list_empty(&c->btree_node_rewrites_pending));
+
if (c->btree_node_rewrite_worker)
destroy_workqueue(c->btree_node_rewrite_worker);
if (c->btree_interior_update_worker)
@@ -2657,8 +2782,9 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
mutex_init(&c->btree_interior_update_lock);
INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
- INIT_LIST_HEAD(&c->pending_node_rewrites);
- mutex_init(&c->pending_node_rewrites_lock);
+ INIT_LIST_HEAD(&c->btree_node_rewrites);
+ INIT_LIST_HEAD(&c->btree_node_rewrites_pending);
+ spin_lock_init(&c->btree_node_rewrites_lock);
}
int bch2_fs_btree_interior_update_init(struct bch_fs *c)