summaryrefslogtreecommitdiff
path: root/fs/bcachefs/disk_accounting.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/disk_accounting.c')
-rw-r--r--fs/bcachefs/disk_accounting.c150
1 files changed, 93 insertions, 57 deletions
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 07eb8fa1b026..b32e91ba8be8 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -79,6 +79,8 @@ static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_
memcpy_u64s_small(acc->v.d, d, nr);
}
+static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos);
+
int bch2_disk_accounting_mod(struct btree_trans *trans,
struct disk_accounting_pos *k,
s64 *d, unsigned nr, bool gc)
@@ -96,9 +98,16 @@ int bch2_disk_accounting_mod(struct btree_trans *trans,
accounting_key_init(&k_i.k, k, d, nr);
- return likely(!gc)
- ? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k)
- : bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
+ if (unlikely(gc)) {
+ int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
+ if (ret == -BCH_ERR_btree_insert_need_mark_replicas)
+ ret = drop_locks_do(trans,
+ bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?:
+ bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
+ return ret;
+ } else {
+ return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k);
+ }
}
int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
@@ -127,14 +136,15 @@ static inline bool is_zero(char *start, char *end)
#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
void *end = &acc_k + 1;
int ret = 0;
- bkey_fsck_err_on(bversion_zero(k.k->bversion),
+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) &&
+ bversion_zero(k.k->bversion),
c, accounting_key_version_0,
"accounting key with version=0");
@@ -217,7 +227,8 @@ void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_po
prt_printf(out, "id=%u", k->snapshot.id);
break;
case BCH_DISK_ACCOUNTING_btree:
- prt_printf(out, "btree=%s", bch2_btree_id_str(k->btree.id));
+ prt_str(out, "btree=");
+ bch2_btree_id_to_text(out, k->btree.id);
break;
}
}
@@ -243,10 +254,10 @@ void bch2_accounting_swab(struct bkey_s k)
}
static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r,
- struct disk_accounting_pos acc)
+ struct disk_accounting_pos *acc)
{
- unsafe_memcpy(r, &acc.replicas,
- replicas_entry_bytes(&acc.replicas),
+ unsafe_memcpy(r, &acc->replicas,
+ replicas_entry_bytes(&acc->replicas),
"variable length struct");
}
@@ -257,7 +268,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_replicas:
- __accounting_to_replicas(r, acc_k);
+ __accounting_to_replicas(r, &acc_k);
return true;
default:
return false;
@@ -322,6 +333,14 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
+
+ if (trace_accounting_mem_insert_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_accounting_to_text(&buf, c, a.s_c);
+ trace_accounting_mem_insert(c, buf.buf);
+ printbuf_exit(&buf);
+ }
return 0;
err:
free_percpu(n.v[1]);
@@ -461,32 +480,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
return ret;
}
-void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c)
-{
- struct bch_accounting_mem *acc = &c->accounting;
-
- percpu_down_read(&c->mark_lock);
- out->atomic++;
-
- eytzinger0_for_each(i, acc->k.nr) {
- struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[i].pos);
-
- bch2_accounting_key_to_text(out, &acc_k);
-
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
-
- prt_str(out, ":");
- for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
- prt_printf(out, " %llu", v[j]);
- prt_newline(out);
- }
-
- --out->atomic;
- percpu_up_read(&c->mark_lock);
-}
-
static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc)
{
darray_for_each(acc->k, e) {
@@ -625,7 +618,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
switch (acc.type) {
case BCH_DISK_ACCOUNTING_replicas: {
struct bch_replicas_padded r;
- __accounting_to_replicas(&r.e, acc);
+ __accounting_to_replicas(&r.e, &acc);
for (unsigned i = 0; i < r.e.nr_devs; i++)
if (r.e.devs[i] != BCH_SB_MEMBER_INVALID &&
@@ -699,11 +692,45 @@ int bch2_accounting_read(struct bch_fs *c)
struct btree_trans *trans = bch2_trans_get(c);
struct printbuf buf = PRINTBUF;
- int ret = for_each_btree_key(trans, iter,
- BTREE_ID_accounting, POS_MIN,
+ /*
+ * We might run more than once if we rewind to start topology repair or
+ * btree node scan - and those might cause us to get different results,
+ * so we can't just skip if we've already run.
+ *
+ * Instead, zero out any accounting we have:
+ */
+ percpu_down_write(&c->mark_lock);
+ darray_for_each(acc->k, e)
+ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
+ for_each_member_device(c, ca)
+ percpu_memset(ca->usage, 0, sizeof(*ca->usage));
+ percpu_memset(c->usage, 0, sizeof(*c->usage));
+ percpu_up_write(&c->mark_lock);
+
+ struct btree_iter iter;
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
+ iter.flags &= ~BTREE_ITER_with_journal;
+ int ret = for_each_btree_key_continue(trans, iter,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
struct bkey u;
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u);
+
+ if (k.k->type != KEY_TYPE_accounting)
+ continue;
+
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, k.k->p);
+
+ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
+ break;
+
+ if (!bch2_accounting_is_mem(acc_k)) {
+ struct disk_accounting_pos next = { .type = acc_k.type + 1 };
+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
+ continue;
+ }
+
accounting_read_key(trans, k);
}));
if (ret)
@@ -715,6 +742,12 @@ int bch2_accounting_read(struct bch_fs *c)
darray_for_each(*keys, i) {
if (i->k->k.type == KEY_TYPE_accounting) {
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, i->k->k.p);
+
+ if (!bch2_accounting_is_mem(acc_k))
+ continue;
+
struct bkey_s_c k = bkey_i_to_s_c(i->k);
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr,
sizeof(acc->k.data[0]),
@@ -748,15 +781,16 @@ int bch2_accounting_read(struct bch_fs *c)
keys->gap = keys->nr = dst - keys->data;
percpu_down_write(&c->mark_lock);
- unsigned i = 0;
- while (i < acc->k.nr) {
- unsigned idx = inorder_to_eytzinger0(i, acc->k.nr);
+ darray_for_each_reverse(acc->k, i) {
struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos);
+ bpos_to_disk_accounting_pos(&acc_k, i->pos);
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false);
+ memset(v, 0, sizeof(v));
+
+ for (unsigned j = 0; j < i->nr_counters; j++)
+ v[j] = percpu_u64_get(i->v[0] + j);
/*
* If the entry counters are zeroed, it should be treated as
@@ -765,26 +799,25 @@ int bch2_accounting_read(struct bch_fs *c)
* Remove it, so that if it's re-added it gets re-marked in the
* superblock:
*/
- ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters)
+ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
? -BCH_ERR_remove_disk_accounting_entry
- : bch2_disk_accounting_validate_late(trans, acc_k,
- v, acc->k.data[idx].nr_counters);
+ : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters);
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
- free_percpu(acc->k.data[idx].v[0]);
- free_percpu(acc->k.data[idx].v[1]);
- darray_remove_item(&acc->k, &acc->k.data[idx]);
- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
- accounting_pos_cmp, NULL);
+ free_percpu(i->v[0]);
+ free_percpu(i->v[1]);
+ darray_remove_item(&acc->k, i);
ret = 0;
continue;
}
if (ret)
goto fsck_err;
- i++;
}
+ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
+ accounting_pos_cmp, NULL);
+
preempt_disable();
struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
@@ -804,7 +837,7 @@ int bch2_accounting_read(struct bch_fs *c)
break;
case BCH_DISK_ACCOUNTING_dev_data_type:
rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, k.dev_data_type.dev);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
if (ca) {
struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
percpu_u64_set(&d->buckets, v[0]);
@@ -881,10 +914,13 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
- continue;
+ break;
- if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
+ if (!bch2_accounting_is_mem(acc_k)) {
+ struct disk_accounting_pos next = { .type = acc_k.type + 1 };
+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
continue;
+ }
bch2_accounting_mem_read(c, k.k->p, v, nr);
@@ -910,7 +946,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
break;
case BCH_DISK_ACCOUNTING_dev_data_type: {
rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (!ca) {
rcu_read_unlock();
continue;