diff options
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/alloc_background.c | 247 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 48 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 56 | ||||
-rw-r--r-- | fs/bcachefs/bkey.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 280 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/ec.c | 35 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 21 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 11 |
9 files changed, 403 insertions, 299 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 68fa6caf022d..9a670bb2ccfb 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -15,6 +15,7 @@ #include "error.h" #include "recovery.h" #include "trace.h" +#include "varint.h" #include <linux/kthread.h> #include <linux/math64.h> @@ -24,11 +25,10 @@ #include <linux/sched/task.h> #include <linux/sort.h> -static const char * const bch2_alloc_field_names[] = { -#define x(name, bytes) #name, - BCH_ALLOC_FIELDS() +static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { +#define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8, + BCH_ALLOC_FIELDS_V1() #undef x - NULL }; static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int); @@ -67,10 +67,10 @@ static void pd_controllers_update(struct work_struct *work) /* Persistent alloc info: */ -static inline u64 get_alloc_field(const struct bch_alloc *a, - const void **p, unsigned field) +static inline u64 alloc_field_v1_get(const struct bch_alloc *a, + const void **p, unsigned field) { - unsigned bytes = BCH_ALLOC_FIELD_BYTES[field]; + unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field]; u64 v; if (!(a->fields & (1 << field))) @@ -97,10 +97,10 @@ static inline u64 get_alloc_field(const struct bch_alloc *a, return v; } -static inline void put_alloc_field(struct bkey_i_alloc *a, void **p, - unsigned field, u64 v) +static inline void alloc_field_v1_put(struct bkey_i_alloc *a, void **p, + unsigned field, u64 v) { - unsigned bytes = BCH_ALLOC_FIELD_BYTES[field]; + unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field]; if (!v) return; @@ -127,55 +127,149 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p, *p += bytes; } -struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) +static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out, + struct bkey_s_c k) { - struct bkey_alloc_unpacked ret = { .gen = 0 }; + const struct bch_alloc *in = bkey_s_c_to_alloc(k).v; + const void *d = in->data; + unsigned idx = 0; - if (k.k->type == KEY_TYPE_alloc) { - const struct bch_alloc *a = bkey_s_c_to_alloc(k).v; - const void *d = a->data; - unsigned idx = 0; + out->gen = in->gen; + +#define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++); + BCH_ALLOC_FIELDS_V1() +#undef x +} + +static void bch2_alloc_pack_v1(struct bkey_alloc_buf *dst, + const struct bkey_alloc_unpacked src) +{ + struct bkey_i_alloc *a = bkey_alloc_init(&dst->k); + void *d = a->v.data; + unsigned bytes, idx = 0; - ret.gen = a->gen; + a->k.p = POS(src.dev, src.bucket); + a->v.fields = 0; + a->v.gen = src.gen; -#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++); - BCH_ALLOC_FIELDS() +#define x(_name, _bits) alloc_field_v1_put(a, &d, idx++, src._name); + BCH_ALLOC_FIELDS_V1() #undef x - } - return ret; + bytes = (void *) d - (void *) &a->v; + set_bkey_val_bytes(&a->k, bytes); + memset_u64s_tail(&a->v, 0, bytes); } -void bch2_alloc_pack(struct bkey_i_alloc *dst, - const struct bkey_alloc_unpacked src) +static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out, + struct bkey_s_c k) { - unsigned idx = 0; - void *d = dst->v.data; + struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k); + const u8 *in = a.v->data; + const u8 *end = bkey_val_end(a); + unsigned fieldnr = 0; + int ret; + u64 v; + + out->gen = a.v->gen; + out->oldest_gen = a.v->oldest_gen; + out->data_type = a.v->data_type; + +#define x(_name, _bits) \ + if (fieldnr < a.v->nr_fields) { \ + ret = bch2_varint_decode(in, end, &v); \ + if (ret < 0) \ + return ret; \ + in += ret; \ + } else { \ + v = 0; \ + } \ + out->_name = v; \ + if (v != out->_name) \ + return -1; \ + fieldnr++; + + BCH_ALLOC_FIELDS_V2() +#undef x + return 0; +} + +static void bch2_alloc_pack_v2(struct bkey_alloc_buf *dst, + const struct bkey_alloc_unpacked src) +{ + struct bkey_i_alloc_v2 *a = bkey_alloc_v2_init(&dst->k); + unsigned nr_fields = 0, last_nonzero_fieldnr = 0; + u8 *out = a->v.data; + u8 *end = (void *) &dst[1]; + u8 *last_nonzero_field = out; unsigned bytes; - dst->v.fields = 0; - dst->v.gen = src.gen; + a->k.p = POS(src.dev, src.bucket); + a->v.gen = src.gen; + a->v.oldest_gen = src.oldest_gen; + a->v.data_type = src.data_type; + +#define x(_name, _bits) \ + nr_fields++; \ + \ + if (src._name) { \ + out += bch2_varint_encode(out, src._name); \ + \ + last_nonzero_field = out; \ + last_nonzero_fieldnr = nr_fields; \ + } else { \ + *out++ = 0; \ + } -#define x(_name, _bits) put_alloc_field(dst, &d, idx++, src._name); - BCH_ALLOC_FIELDS() + BCH_ALLOC_FIELDS_V2() #undef x + BUG_ON(out > end); + + out = last_nonzero_field; + a->v.nr_fields = last_nonzero_fieldnr; + + bytes = (u8 *) out - (u8 *) &a->v; + set_bkey_val_bytes(&a->k, bytes); + memset_u64s_tail(&a->v, 0, bytes); +} + +struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) +{ + struct bkey_alloc_unpacked ret = { + .dev = k.k->p.inode, + .bucket = k.k->p.offset, + .gen = 0, + }; - bytes = (void *) d - (void *) &dst->v; - set_bkey_val_bytes(&dst->k, bytes); - memset_u64s_tail(&dst->v, 0, bytes); + if (k.k->type == KEY_TYPE_alloc_v2) + bch2_alloc_unpack_v2(&ret, k); + else if (k.k->type == KEY_TYPE_alloc) + bch2_alloc_unpack_v1(&ret, k); + + return ret; +} + +void bch2_alloc_pack(struct bch_fs *c, + struct bkey_alloc_buf *dst, + const struct bkey_alloc_unpacked src) +{ + if (c->sb.features & (1ULL << BCH_FEATURE_alloc_v2)) + bch2_alloc_pack_v2(dst, src); + else + bch2_alloc_pack_v1(dst, src); } static unsigned bch_alloc_val_u64s(const struct bch_alloc *a) { unsigned i, bytes = offsetof(struct bch_alloc, data); - for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_FIELD_BYTES); i++) + for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++) if (a->fields & (1 << i)) - bytes += BCH_ALLOC_FIELD_BYTES[i]; + bytes += BCH_ALLOC_V1_FIELD_BYTES[i]; return DIV_ROUND_UP(bytes, sizeof(u64)); } -const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) +const char *bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); @@ -190,20 +284,30 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) return NULL; } -void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) +const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - const void *d = a.v->data; - unsigned i; + struct bkey_alloc_unpacked u; + + if (k.k->p.inode >= c->sb.nr_devices || + !c->devs[k.k->p.inode]) + return "invalid device"; - pr_buf(out, "gen %u", a.v->gen); + if (bch2_alloc_unpack_v2(&u, k)) + return "unpack error"; - for (i = 0; i < BCH_ALLOC_FIELD_NR; i++) - if (a.v->fields & (1 << i)) - pr_buf(out, " %s %llu", - bch2_alloc_field_names[i], - get_alloc_field(a.v, &d, i)); + return NULL; +} + +void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); + + pr_buf(out, "gen %u oldest_gen %u data_type %u", + u.gen, u.oldest_gen, u.data_type); +#define x(_name, ...) pr_buf(out, #_name " %llu ", (u64) u._name); + BCH_ALLOC_FIELDS_V2() +#undef x } static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id, @@ -213,7 +317,9 @@ static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id, struct bucket *g; struct bkey_alloc_unpacked u; - if (level || k.k->type != KEY_TYPE_alloc) + if (level || + (k.k->type != KEY_TYPE_alloc && + k.k->type != KEY_TYPE_alloc_v2)) return 0; ca = bch_dev_bkey_exists(c, k.k->p.inode); @@ -281,8 +387,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, struct bucket *g; struct bucket_mark m; struct bkey_alloc_unpacked old_u, new_u; - __BKEY_PADDED(k, 8) alloc_key; /* hack: */ - struct bkey_i_alloc *a; + struct bkey_alloc_buf a; int ret; retry: bch2_trans_begin(trans); @@ -303,17 +408,14 @@ retry: ca = bch_dev_bkey_exists(c, iter->pos.inode); g = bucket(ca, iter->pos.offset); m = READ_ONCE(g->mark); - new_u = alloc_mem_to_key(g, m); + new_u = alloc_mem_to_key(iter, g, m); percpu_up_read(&c->mark_lock); if (!bkey_alloc_unpacked_cmp(old_u, new_u)) return 0; - a = bkey_alloc_init(&alloc_key.k); - a->k.p = iter->pos; - bch2_alloc_pack(a, new_u); - - bch2_trans_update(trans, iter, &a->k_i, + bch2_alloc_pack(c, &a, new_u); + bch2_trans_update(trans, iter, &a.k, BTREE_TRIGGER_NORUN); ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL|flags); @@ -473,9 +575,9 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, struct bch_dev *ca = bch_dev_bkey_exists(c, dev); struct btree_iter *iter; struct bucket *g; - struct bkey_i_alloc *a; + struct bkey_alloc_buf *a; struct bkey_alloc_unpacked u; - u16 *time; + u64 *time; int ret = 0; iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr), @@ -486,28 +588,24 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, if (ret) goto out; - a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); + a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; percpu_down_read(&c->mark_lock); g = bucket(ca, bucket_nr); - u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); - bkey_alloc_init(&a->k_i); - a->k.p = iter->pos; - time = rw == READ ? &u.read_time : &u.write_time; if (*time == c->bucket_clock[rw].hand) goto out; *time = c->bucket_clock[rw].hand; - bch2_alloc_pack(a, u); - - ret = bch2_trans_update(trans, iter, &a->k_i, 0) ?: + bch2_alloc_pack(c, a, u); + ret = bch2_trans_update(trans, iter, &a->k, 0) ?: bch2_trans_commit(trans, NULL, NULL, 0); out: bch2_trans_iter_put(trans, iter); @@ -863,14 +961,8 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, struct btree_iter *iter, u64 *journal_seq, unsigned flags) { -#if 0 - __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key; -#else - /* hack: */ - __BKEY_PADDED(k, 8) alloc_key; -#endif struct bch_fs *c = trans->c; - struct bkey_i_alloc *a; + struct bkey_alloc_buf a; struct bkey_alloc_unpacked u; struct bucket *g; struct bucket_mark m; @@ -920,8 +1012,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, goto out; } - BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); - bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); retry: ret = bch2_btree_iter_traverse(iter); @@ -931,7 +1021,7 @@ retry: percpu_down_read(&c->mark_lock); g = bucket(ca, iter->pos.offset); m = READ_ONCE(g->mark); - u = alloc_mem_to_key(g, m); + u = alloc_mem_to_key(iter, g, m); percpu_up_read(&c->mark_lock); @@ -944,11 +1034,8 @@ retry: u.read_time = c->bucket_clock[READ].hand; u.write_time = c->bucket_clock[WRITE].hand; - a = bkey_alloc_init(&alloc_key.k); - a->k.p = iter->pos; - bch2_alloc_pack(a, u); - - bch2_trans_update(trans, iter, &a->k_i, + bch2_alloc_pack(c, &a, u); + bch2_trans_update(trans, iter, &a.k, BTREE_TRIGGER_BUCKET_INVALIDATE); /* diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index f60fcebff2ce..6fededcd9f86 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -7,12 +7,33 @@ #include "debug.h" struct bkey_alloc_unpacked { + u64 bucket; + u8 dev; u8 gen; + u8 oldest_gen; + u8 data_type; #define x(_name, _bits) u##_bits _name; - BCH_ALLOC_FIELDS() + BCH_ALLOC_FIELDS_V2() #undef x }; +struct bkey_alloc_buf { + struct bkey_i k; + + union { + struct { +#define x(_name, _bits) + _bits / 8 + u8 _pad[8 + BCH_ALLOC_FIELDS_V1()]; +#undef x + } _v1; + struct { +#define x(_name, _bits) + 8 + _bits / 8 + u8 _pad[8 + BCH_ALLOC_FIELDS_V2()]; +#undef x + } _v2; + }; +} __attribute__((packed, aligned(8))); + /* How out of date a pointer gen is allowed to be: */ #define BUCKET_GC_GEN_MAX 96U @@ -20,23 +41,28 @@ struct bkey_alloc_unpacked { static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, struct bkey_alloc_unpacked r) { - return l.gen != r.gen -#define x(_name, _bits) || l._name != r._name - BCH_ALLOC_FIELDS() + return l.gen != r.gen || + l.oldest_gen != r.oldest_gen || + l.data_type != r.data_type +#define x(_name, ...) || l._name != r._name + BCH_ALLOC_FIELDS_V2() #undef x ; } struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); -void bch2_alloc_pack(struct bkey_i_alloc *, +void bch2_alloc_pack(struct bch_fs *, struct bkey_alloc_buf *, const struct bkey_alloc_unpacked); int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); static inline struct bkey_alloc_unpacked -alloc_mem_to_key(struct bucket *g, struct bucket_mark m) +alloc_mem_to_key(struct btree_iter *iter, + struct bucket *g, struct bucket_mark m) { return (struct bkey_alloc_unpacked) { + .dev = iter->pos.inode, + .bucket = iter->pos.offset, .gen = m.gen, .oldest_gen = g->oldest_gen, .data_type = m.data_type, @@ -49,11 +75,17 @@ alloc_mem_to_key(struct bucket *g, struct bucket_mark m) #define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) -const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c); +const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c); +const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc (struct bkey_ops) { \ - .key_invalid = bch2_alloc_invalid, \ + .key_invalid = bch2_alloc_v1_invalid, \ + .val_to_text = bch2_alloc_to_text, \ +} + +#define bch2_bkey_ops_alloc_v2 (struct bkey_ops) { \ + .key_invalid = bch2_alloc_v2_invalid, \ .val_to_text = bch2_alloc_to_text, \ } diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 77af77efdd6d..b6c7e57b6bcd 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -345,7 +345,8 @@ static inline void bkey_init(struct bkey *k) x(reflink_v, 16) \ x(inline_data, 17) \ x(btree_ptr_v2, 18) \ - x(indirect_inline_data, 19) + x(indirect_inline_data, 19) \ + x(alloc_v2, 20) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -555,9 +556,11 @@ struct bch_extent_stripe_ptr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:5, block:8, - idx:51; + redundancy:4, + idx:47; #elif defined (__BIG_ENDIAN_BITFIELD) - __u64 idx:51, + __u64 idx:47, + redundancy:4, block:8, type:5; #endif @@ -803,35 +806,40 @@ struct bch_alloc { __u8 data[]; } __attribute__((packed, aligned(8))); -#define BCH_ALLOC_FIELDS() \ +#define BCH_ALLOC_FIELDS_V1() \ x(read_time, 16) \ x(write_time, 16) \ x(data_type, 8) \ x(dirty_sectors, 16) \ x(cached_sectors, 16) \ - x(oldest_gen, 8) + x(oldest_gen, 8) \ + x(stripe, 32) \ + x(stripe_redundancy, 8) + +struct bch_alloc_v2 { + struct bch_val v; + __u8 nr_fields; + __u8 gen; + __u8 oldest_gen; + __u8 data_type; + __u8 data[]; +} __attribute__((packed, aligned(8))); + +#define BCH_ALLOC_FIELDS_V2() \ + x(read_time, 64) \ + x(write_time, 64) \ + x(dirty_sectors, 16) \ + x(cached_sectors, 16) \ + x(stripe, 32) \ + x(stripe_redundancy, 8) enum { -#define x(name, bytes) BCH_ALLOC_FIELD_##name, - BCH_ALLOC_FIELDS() +#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, + BCH_ALLOC_FIELDS_V1() #undef x BCH_ALLOC_FIELD_NR }; -static const unsigned BCH_ALLOC_FIELD_BYTES[] = { -#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8, - BCH_ALLOC_FIELDS() -#undef x -}; - -#define x(name, bits) + (bits / 8) -static const unsigned BKEY_ALLOC_VAL_U64s_MAX = - DIV_ROUND_UP(offsetof(struct bch_alloc, data) - BCH_ALLOC_FIELDS(), sizeof(u64)); -#undef x - -#define BKEY_ALLOC_U64s_MAX (BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX) - /* Quotas: */ enum quota_types { @@ -1337,7 +1345,8 @@ LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); x(btree_updates_journalled, 13) \ x(reflink_inline_data, 14) \ x(new_varint, 15) \ - x(journal_no_flush, 16) + x(journal_no_flush, 16) \ + x(alloc_v2, 17) #define BCH_SB_FEATURES_ALL \ ((1ULL << BCH_FEATURE_new_siphash)| \ @@ -1345,7 +1354,8 @@ LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); (1ULL << BCH_FEATURE_btree_ptr_v2)| \ (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ (1ULL << BCH_FEATURE_new_varint)| \ - (1ULL << BCH_FEATURE_journal_no_flush)) + (1ULL << BCH_FEATURE_journal_no_flush)| \ + (1ULL << BCH_FEATURE_alloc_v2)) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index f984064f4b5d..9fd752b5c2f5 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -538,6 +538,7 @@ BKEY_VAL_ACCESSORS(reflink_v); BKEY_VAL_ACCESSORS(inline_data); BKEY_VAL_ACCESSORS(btree_ptr_v2); BKEY_VAL_ACCESSORS(indirect_inline_data); +BKEY_VAL_ACCESSORS(alloc_v2); /* byte order helpers */ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 1be527ab1416..7b60e988df83 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -688,7 +688,8 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bucket_mark old_m, m; /* We don't do anything for deletions - do we?: */ - if (new.k->type != KEY_TYPE_alloc) + if (new.k->type != KEY_TYPE_alloc && + new.k->type != KEY_TYPE_alloc_v2) return 0; /* @@ -711,6 +712,7 @@ static int bch2_mark_alloc(struct bch_fs *c, m.data_type = u.data_type; m.dirty_sectors = u.dirty_sectors; m.cached_sectors = u.cached_sectors; + m.stripe = u.stripe != 0; if (journal_seq) { m.journal_seq_valid = 1; @@ -724,6 +726,8 @@ static int bch2_mark_alloc(struct bch_fs *c, g->io_time[WRITE] = u.write_time; g->oldest_gen = u.oldest_gen; g->gen_valid = 1; + g->stripe = u.stripe; + g->stripe_redundancy = u.stripe_redundancy; /* * need to know if we're getting called from the invalidate path or @@ -918,11 +922,10 @@ static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, return 0; } -static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, +static int mark_stripe_bucket(struct bch_fs *c, struct bkey_s_c k, unsigned ptr_idx, struct bch_fs_usage *fs_usage, - u64 journal_seq, unsigned flags, - bool enabled) + u64 journal_seq, unsigned flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; unsigned nr_data = s->nr_blocks - s->nr_redundant; @@ -935,8 +938,13 @@ static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, char buf[200]; int ret; - if (enabled) - g->ec_redundancy = s->nr_redundant; + if (g->stripe && g->stripe != k.k->p.offset) { + bch2_fs_inconsistent(c, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + return -EINVAL; + } old = bucket_cmpxchg(g, new, ({ ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, @@ -944,23 +952,9 @@ static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, if (ret) return ret; - if (new.stripe && enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", - ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - - if (!new.stripe && !enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u: deleting stripe but not marked\n%s", - ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - - new.stripe = enabled; - - if ((flags & BTREE_TRIGGER_GC) && parity) { - new.data_type = enabled ? BCH_DATA_parity : 0; - new.dirty_sectors = enabled ? le16_to_cpu(s->sectors): 0; + if (parity) { + new.data_type = BCH_DATA_parity; + new.dirty_sectors = le16_to_cpu(s->sectors); } if (journal_seq) { @@ -969,8 +963,8 @@ static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, } })); - if (!enabled) - g->ec_redundancy = 0; + g->stripe = k.k->p.offset; + g->stripe_redundancy = s->nr_redundant; bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); return 0; @@ -1166,6 +1160,8 @@ static int bch2_mark_stripe(struct bch_fs *c, unsigned i; int ret; + BUG_ON(gc && old_s); + if (!m || (old_s && !m->alive)) { bch_err_ratelimited(c, "error marking nonexistent stripe %zu", idx); @@ -1173,48 +1169,12 @@ static int bch2_mark_stripe(struct bch_fs *c, } if (!new_s) { - /* Deleting: */ - for (i = 0; i < old_s->nr_blocks; i++) { - ret = bucket_set_stripe(c, old, i, fs_usage, - journal_seq, flags, false); - if (ret) - return ret; - } - - if (!gc && m->on_heap) { - spin_lock(&c->ec_stripes_heap_lock); - bch2_stripes_heap_del(c, m, idx); - spin_unlock(&c->ec_stripes_heap_lock); - } - - if (gc) - update_replicas(c, fs_usage, &m->r.e, - -((s64) m->sectors * m->nr_redundant)); + spin_lock(&c->ec_stripes_heap_lock); + bch2_stripes_heap_del(c, m, idx); + spin_unlock(&c->ec_stripes_heap_lock); memset(m, 0, sizeof(*m)); } else { - BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks); - BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant); - - for (i = 0; i < new_s->nr_blocks; i++) { - if (!old_s || - memcmp(new_s->ptrs + i, - old_s->ptrs + i, - sizeof(struct bch_extent_ptr))) { - - if (old_s) { - bucket_set_stripe(c, old, i, fs_usage, - journal_seq, flags, false); - if (ret) - return ret; - } - ret = bucket_set_stripe(c, new, i, fs_usage, - journal_seq, flags, true); - if (ret) - return ret; - } - } - m->alive = true; m->sectors = le16_to_cpu(new_s->sectors); m->algorithm = new_s->algorithm; @@ -1223,27 +1183,13 @@ static int bch2_mark_stripe(struct bch_fs *c, m->blocks_nonempty = 0; for (i = 0; i < new_s->nr_blocks; i++) { - unsigned s = stripe_blockcount_get(new_s, i); - - /* - * gc recalculates this field from stripe ptr - * references: - */ - if (!gc) - m->block_sectors[i] = s; - m->blocks_nonempty += !!s; + m->block_sectors[i] = + stripe_blockcount_get(new_s, i); + m->blocks_nonempty += !!m->block_sectors[i]; } - if (gc && old_s) - update_replicas(c, fs_usage, &m->r.e, - -((s64) m->sectors * m->nr_redundant)); - bch2_bkey_to_replicas(&m->r.e, new); - if (gc) - update_replicas(c, fs_usage, &m->r.e, - ((s64) m->sectors * m->nr_redundant)); - if (!gc) { spin_lock(&c->ec_stripes_heap_lock); bch2_stripes_heap_update(c, m, idx); @@ -1251,6 +1197,25 @@ static int bch2_mark_stripe(struct bch_fs *c, } } + if (gc) { + /* + * gc recalculates this field from stripe ptr + * references: + */ + memset(m->block_sectors, 0, sizeof(m->block_sectors)); + m->blocks_nonempty = 0; + + for (i = 0; i < new_s->nr_blocks; i++) { + ret = mark_stripe_bucket(c, new, i, fs_usage, + journal_seq, flags); + if (ret) + return ret; + } + + update_replicas(c, fs_usage, &m->r.e, + ((s64) m->sectors * m->nr_redundant)); + } + return 0; } @@ -1274,6 +1239,7 @@ static int bch2_mark_key_locked(struct bch_fs *c, switch (k.k->type) { case KEY_TYPE_alloc: + case KEY_TYPE_alloc_v2: ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags); break; case KEY_TYPE_btree_ptr: @@ -1542,9 +1508,10 @@ static int trans_get_key(struct btree_trans *trans, return ret; } -static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, - const struct bch_extent_ptr *ptr, - struct bkey_alloc_unpacked *u) +static struct bkey_alloc_buf * +bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, + const struct bch_extent_ptr *ptr, + struct bkey_alloc_unpacked *u) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); @@ -1552,8 +1519,13 @@ static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree struct bucket *g; struct btree_iter *iter; struct bkey_s_c k; + struct bkey_alloc_buf *a; int ret; + a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); + if (IS_ERR(a)) + return a; + iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); if (iter) { *u = bch2_alloc_unpack(k); @@ -1565,17 +1537,17 @@ static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree ret = bch2_btree_iter_traverse(iter); if (ret) { bch2_trans_iter_put(trans, iter); - return ret; + return ERR_PTR(ret); } percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); - *u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + *u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } *_iter = iter; - return 0; + return a; } static int bch2_trans_mark_pointer(struct btree_trans *trans, @@ -1585,27 +1557,20 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_alloc_unpacked u; - struct bkey_i_alloc *a; + struct bkey_alloc_buf *a; int ret; - ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); - if (ret) - return ret; + a = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); + if (IS_ERR(a)) + return PTR_ERR(a); ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, &u.dirty_sectors, &u.cached_sectors); if (ret) goto out; - a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - goto out; - - bkey_alloc_init(&a->k_i); - a->k.p = iter->pos; - bch2_alloc_pack(a, u); - bch2_trans_update(trans, iter, &a->k_i, 0); + bch2_alloc_pack(c, a, u); + bch2_trans_update(trans, iter, &a->k, 0); out: bch2_trans_iter_put(trans, iter); return ret; @@ -1716,34 +1681,51 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, } static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, - const struct bch_extent_ptr *ptr, - s64 sectors, bool parity) + struct bkey_s_c_stripe s, + unsigned idx, bool deleting) { - struct bkey_i_alloc *a; + struct bch_fs *c = trans->c; + const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; + struct bkey_alloc_buf *a; struct btree_iter *iter; struct bkey_alloc_unpacked u; - int ret; + bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant; + int ret = 0; - ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); - if (ret) - return ret; + a = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); + if (IS_ERR(a)) + return PTR_ERR(a); if (parity) { + s64 sectors = le16_to_cpu(s.v->sectors); + + if (deleting) + sectors = -sectors; + u.dirty_sectors += sectors; u.data_type = u.dirty_sectors ? BCH_DATA_parity : 0; } - a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - goto err; + if (!deleting) { + if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c, + "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)", + iter->pos.inode, iter->pos.offset, u.gen, + u.stripe, s.k->p.offset)) { + ret = -EIO; + goto err; + } - bkey_alloc_init(&a->k_i); - a->k.p = iter->pos; - bch2_alloc_pack(a, u); - bch2_trans_update(trans, iter, &a->k_i, 0); + u.stripe = s.k->p.offset; + u.stripe_redundancy = s.v->nr_redundant; + } else { + u.stripe = 0; + u.stripe_redundancy = 0; + } + + bch2_alloc_pack(c, a, u); + bch2_trans_update(trans, iter, &a->k, 0); err: bch2_trans_iter_put(trans, iter); return ret; @@ -1753,51 +1735,50 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s_c new, unsigned flags) { - const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe - ? bkey_s_c_to_stripe(old).v : NULL; - const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe - ? bkey_s_c_to_stripe(new).v : NULL; + struct bkey_s_c_stripe old_s = { NULL }; + struct bkey_s_c_stripe new_s = { NULL }; struct bch_replicas_padded r; unsigned i; int ret = 0; + if (old.k->type == KEY_TYPE_stripe) + old_s = bkey_s_c_to_stripe(old); + if (new.k->type == KEY_TYPE_stripe) + new_s = bkey_s_c_to_stripe(new); + /* * If the pointers aren't changing, we don't need to do anything: */ - if (new_s && old_s && - !memcmp(old_s->ptrs, new_s->ptrs, - new_s->nr_blocks * sizeof(struct bch_extent_ptr))) + if (new_s.k && old_s.k && + new_s.v->nr_blocks == old_s.v->nr_blocks && + new_s.v->nr_redundant == old_s.v->nr_redundant && + !memcmp(old_s.v->ptrs, new_s.v->ptrs, + new_s.v->nr_blocks * sizeof(struct bch_extent_ptr))) return 0; - if (new_s) { - unsigned nr_data = new_s->nr_blocks - new_s->nr_redundant; - s64 sectors = le16_to_cpu(new_s->sectors); + if (new_s.k) { + s64 sectors = le16_to_cpu(new_s.v->sectors); bch2_bkey_to_replicas(&r.e, new); - update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + update_replicas_list(trans, &r.e, sectors * new_s.v->nr_redundant); - for (i = 0; i < new_s->nr_blocks; i++) { - bool parity = i >= nr_data; - - ret = bch2_trans_mark_stripe_alloc_ref(trans, - &new_s->ptrs[i], sectors, parity); + for (i = 0; i < new_s.v->nr_blocks; i++) { + ret = bch2_trans_mark_stripe_alloc_ref(trans, new_s, + i, false); if (ret) return ret; } } - if (old_s) { - unsigned nr_data = old_s->nr_blocks - old_s->nr_redundant; - s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); + if (old_s.k) { + s64 sectors = -((s64) le16_to_cpu(old_s.v->sectors)); bch2_bkey_to_replicas(&r.e, old); - update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + update_replicas_list(trans, &r.e, sectors * old_s.v->nr_redundant); - for (i = 0; i < old_s->nr_blocks; i++) { - bool parity = i >= nr_data; - - ret = bch2_trans_mark_stripe_alloc_ref(trans, - &old_s->ptrs[i], sectors, parity); + for (i = 0; i < old_s.v->nr_blocks; i++) { + ret = bch2_trans_mark_stripe_alloc_ref(trans, old_s, + i, true); if (ret) return ret; } @@ -2068,21 +2049,16 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_alloc_unpacked u; - struct bkey_i_alloc *a; + struct bkey_alloc_buf *a; struct bch_extent_ptr ptr = { .dev = ca->dev_idx, .offset = bucket_to_sector(ca, b), }; int ret = 0; - a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - return ret; - - ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); - if (ret) - return ret; + a = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); + if (IS_ERR(a)) + return PTR_ERR(a); if (u.data_type && u.data_type != type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, @@ -2115,10 +2091,8 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, u.data_type = type; u.dirty_sectors = sectors; - bkey_alloc_init(&a->k_i); - a->k.p = iter->pos; - bch2_alloc_pack(a, u); - bch2_trans_update(trans, iter, &a->k_i, 0); + bch2_alloc_pack(c, a, u); + bch2_trans_update(trans, iter, &a->k, 0); out: bch2_trans_iter_put(trans, iter); return ret; diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 98b6c18ca2e8..99ab9f48ba9d 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -41,7 +41,8 @@ struct bucket { u8 oldest_gen; u8 gc_gen; unsigned gen_valid:1; - u8 ec_redundancy; + u8 stripe_redundancy; + u32 stripe; }; struct bucket_array { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 78bea3e5fa9a..a32d399e5b6f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -105,6 +105,9 @@ const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + if (!bkey_cmp(k.k->p, POS_MIN)) + return "stripe at pos 0"; + if (k.k->p.inode) return "invalid stripe key"; @@ -279,10 +282,14 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { + char buf2[200]; + + bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&buf->key.k_i)); + bch_err_ratelimited(c, - "stripe checksum error at %u:%u: csum type %u, expected %llx got %llx", - i, j, v->csum_type, - want.lo, got.lo); + "stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s", + (void *) _RET_IP_, i, j, v->csum_type, + want.lo, got.lo, buf2); clear_bit(i, buf->valid); break; } @@ -335,6 +342,8 @@ static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) static void ec_block_endio(struct bio *bio) { struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio); + struct bch_stripe *v = &ec_bio->buf->key.v; + struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx]; struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; @@ -343,6 +352,13 @@ static void ec_block_endio(struct bio *bio) bch2_blk_status_to_str(bio->bi_status))) clear_bit(ec_bio->idx, ec_bio->buf->valid); + if (ptr_stale(ca, ptr)) { + bch_err_ratelimited(ca->fs, + "error %s stripe: stale pointer after io", + bio_data_dir(bio) == READ ? "reading from" : "writing to"); + clear_bit(ec_bio->idx, ec_bio->buf->valid); + } + bio_put(&ec_bio->bio); percpu_ref_put(&ca->io_ref); closure_put(cl); @@ -652,7 +668,6 @@ void bch2_stripes_heap_update(struct bch_fs *c, static int ec_stripe_delete(struct bch_fs *c, size_t idx) { - //pr_info("deleting stripe %zu", idx); return bch2_btree_delete_range(c, BTREE_ID_EC, POS(0, idx), POS(0, idx + 1), @@ -795,6 +810,7 @@ static void extent_stripe_ptr_add(struct bkey_s_extent e, *dst = (struct bch_extent_stripe_ptr) { .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr, .block = block, + .redundancy = s->key.v.nr_redundant, .idx = s->key.k.p.offset, }; } @@ -1054,8 +1070,6 @@ void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp, if (!ob) return; - //pr_info("adding backpointer at %llu:%llu", pos.inode, pos.offset); - ec = ob->ec; mutex_lock(&ec->lock); @@ -1348,12 +1362,14 @@ static s64 get_existing_stripe(struct bch_fs *c, struct stripe *m; size_t heap_idx; u64 stripe_idx; + s64 ret = -1; if (may_create_new_stripe(c)) return -1; spin_lock(&c->ec_stripes_heap_lock); for (heap_idx = 0; heap_idx < h->used; heap_idx++) { + /* No blocks worth reusing, stripe will just be deleted: */ if (!h->data[heap_idx].blocks_nonempty) continue; @@ -1365,13 +1381,12 @@ static s64 get_existing_stripe(struct bch_fs *c, m->sectors == head->blocksize && m->blocks_nonempty < m->nr_blocks - m->nr_redundant) { bch2_stripes_heap_del(c, m, stripe_idx); - spin_unlock(&c->ec_stripes_heap_lock); - return stripe_idx; + ret = stripe_idx; + break; } } - spin_unlock(&c->ec_stripes_heap_lock); - return -1; + return ret; } struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 6e388881ebf9..50ab240d89a8 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -703,14 +703,8 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) if (p.ptr.cached) continue; - if (p.has_ec) { - struct stripe *s = - genradix_ptr(&c->stripes[0], p.ec.idx); - - WARN_ON(!s); - if (s) - replicas += s->nr_redundant; - } + if (p.has_ec) + replicas += p.ec.redundancy; replicas++; @@ -733,16 +727,9 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c, if (ca->mi.state != BCH_MEMBER_STATE_FAILED) durability = max_t(unsigned, durability, ca->mi.durability); - if (p.has_ec) { - struct stripe *s = - genradix_ptr(&c->stripes[0], p.ec.idx); - - if (WARN_ON(!s)) - goto out; + if (p.has_ec) + durability += p.ec.redundancy; - durability += s->nr_redundant; - } -out: return durability; } diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index a867460bc71c..8e6e4cd73886 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -92,11 +92,8 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; data_opts->rewrite_dev = p.ptr.dev; - if (p.has_ec) { - struct stripe *m = genradix_ptr(&c->stripes[0], p.ec.idx); - - data_opts->nr_replicas += m->nr_redundant; - } + if (p.has_ec) + data_opts->nr_replicas += p.ec.redundancy; return DATA_REWRITE; } @@ -179,12 +176,12 @@ static int bch2_copygc(struct bch_fs *c) bucket_sectors_used(m) >= ca->mi.bucket_size) continue; - WARN_ON(m.stripe && !g->ec_redundancy); + WARN_ON(m.stripe && !g->stripe_redundancy); e = (struct copygc_heap_entry) { .dev = dev_idx, .gen = m.gen, - .replicas = 1 + g->ec_redundancy, + .replicas = 1 + g->stripe_redundancy, .fragmentation = bucket_sectors_used(m) * (1U << 15) / ca->mi.bucket_size, .sectors = bucket_sectors_used(m), |