summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_trans_commit.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-11-09 14:22:46 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-07-14 19:00:13 -0400
commit1d16c605cc55ef26f0c65b362665a6c99080ccbc (patch)
treec7f7f6b6209ac290185dbea26a1f9d59661ac95f /fs/bcachefs/btree_trans_commit.c
parent5d9667d1d6eaca3f6cd3c63cd6a0f309147c7f5c (diff)
bcachefs: Disk space accounting rewrite
Main part of the disk accounting rewrite. This is a wholesale rewrite of the existing disk space accounting, which relies on percepu counters that are sharded by journal buffer, and rolled up and added to each journal write. With the new scheme, every set of counters is a distinct key in the accounting btree; this fixes scaling limitations of the old scheme, where counters took up space in each journal entry and required multiple percpu counters. Now, in memory accounting requires a single set of percpu counters - not multiple for each in flight journal buffer - and in the future we'll probably also have counters that don't use in memory percpu counters, they're not strictly required. An accounting update is now a normal btree update, using the btree write buffer path. At transaction commit time, we apply accounting updates to the in memory counters, which are percpu counters indexed in an eytzinger tree by the accounting key. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/btree_trans_commit.c')
-rw-r--r--fs/bcachefs/btree_trans_commit.c65
1 files changed, 50 insertions, 15 deletions
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 05e819174697..92305c12cb75 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -10,6 +10,7 @@
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "buckets.h"
+#include "disk_accounting.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
@@ -620,6 +621,14 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
return 0;
}
+static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
+{
+ return (struct bversion) {
+ .hi = res->seq >> 32,
+ .lo = (res->seq << 32) | (res->offset + offset),
+ };
+}
+
static inline int
bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct btree_insert_entry **stopped_at,
@@ -628,7 +637,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct bch_fs *c = trans->c;
struct btree_trans_commit_hook *h;
unsigned u64s = 0;
- int ret;
+ int ret = 0;
bch2_trans_verify_not_unlocked(trans);
bch2_trans_verify_not_in_restart(trans);
@@ -693,21 +702,38 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
i->k->k.version = MAX_VERSION;
}
- if (trans->fs_usage_deltas &&
- bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
- return -BCH_ERR_btree_insert_need_mark_replicas;
-
- /* XXX: we only want to run this if deltas are nonzero */
- bch2_trans_account_disk_usage_change(trans);
-
h = trans->hooks;
while (h) {
ret = h->fn(trans, h);
if (ret)
- goto revert_fs_usage;
+ return ret;
h = h->next;
}
+ struct jset_entry *entry = trans->journal_entries;
+
+ if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
+ percpu_down_read(&c->mark_lock);
+
+ for (entry = trans->journal_entries;
+ entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ entry = vstruct_next(entry))
+ if (jset_entry_is_key(entry) && entry->start->k.type == KEY_TYPE_accounting) {
+ struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start);
+
+ a->k.version = journal_pos_to_bversion(&trans->journal_res,
+ (u64 *) entry - (u64 *) trans->journal_entries);
+ BUG_ON(bversion_zero(a->k.version));
+ ret = bch2_accounting_mem_mod(trans, accounting_i_to_s_c(a));
+ if (ret)
+ goto revert_fs_usage;
+ }
+ percpu_up_read(&c->mark_lock);
+
+ /* XXX: we only want to run this if deltas are nonzero */
+ bch2_trans_account_disk_usage_change(trans);
+ }
+
trans_for_each_update(trans, i)
if (BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS & (1U << i->bkey_type)) {
ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_atomic|i->flags);
@@ -776,10 +802,20 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
return 0;
fatal_err:
- bch2_fatal_error(c);
+ bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
+ percpu_down_read(&c->mark_lock);
revert_fs_usage:
- if (trans->fs_usage_deltas)
- bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas);
+ for (struct jset_entry *entry2 = trans->journal_entries;
+ entry2 != entry;
+ entry2 = vstruct_next(entry2))
+ if (jset_entry_is_key(entry2) && entry2->start->k.type == KEY_TYPE_accounting) {
+ struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
+
+ bch2_accounting_neg(a);
+ bch2_accounting_mem_mod(trans, a.c);
+ bch2_accounting_neg(a);
+ }
+ percpu_up_read(&c->mark_lock);
return ret;
}
@@ -929,7 +965,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
break;
case -BCH_ERR_btree_insert_need_mark_replicas:
ret = drop_locks_do(trans,
- bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas));
+ bch2_accounting_update_sb(trans));
break;
case -BCH_ERR_journal_res_get_blocked:
/*
@@ -1033,8 +1069,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
!trans->journal_entries_u64s)
goto out_reset;
- memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
-
ret = bch2_trans_commit_run_triggers(trans);
if (ret)
goto out_reset;
@@ -1131,6 +1165,7 @@ retry:
bch2_trans_verify_not_in_restart(trans);
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+ memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_);