diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2024-05-08 00:29:24 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2024-05-08 14:56:09 -0400 |
commit | 5dfd3746b6c486db18bc75de89c7abce41c7826c (patch) | |
tree | c43f0029ada7870f3f8be29579c382a481464174 /fs/bcachefs/bkey_sort.c | |
parent | 5ad1f33c29c311787943a92c56903da74f19fd5d (diff) |
bcachefs: Fix needs_whiteout BUG_ON() in bkey_sort()
Btree nodes are log structured; thus, we need to emit whiteouts when
we're deleting a key that's been written out to disk.
k->needs_whiteout tracks whether a key will need a whiteout when it's
deleted, and this requires some careful handling; e.g. the key we're
deleting may not have been written out to disk, but it may have
overwritten a key that was - thus we need to carry this flag around on
overwrites.
Invariants:
There may be multiple key for the same position in a given node (because
of overwrites), but only one of them will be a live (non deleted) key,
and only one key for a given position will have the needs_whiteout flag
set.
Additionally, we don't want to carry around whiteouts that need to be
written in the main searchable part of a btree node - btree_iter_peek()
will have to skip past them, and this can lead to an O(n^2) issues when
doing sequential deletions (e.g. inode rm/truncate). So there's a
separate region in the btree node buffer for unwritten whiteouts; these
are merge sorted with the rest of the keys we're writing in the btree
node write path.
The unwritten whiteouts was a later optimization that bch2_sort_keys()
didn't take into account; the unwritten whiteouts area means that we
never have deleted keys with needs_whiteout set in the main searchable
part of a btree node.
That means we can simplify and optimize some sort paths, and eliminate
an assertion that syzbot found:
- Unless we're in the btree node write path, it's always ok to drop
whiteouts when sorting
- When sorting for a btree node write, we drop the whiteout if it's not
from the unwritten whiteouts area, or if it's overwritten by a real
key at the same position.
This completely eliminates some tricky logic for propagating the
needs_whiteout flag: syzbot was able to hit the assertion that checked
that there shouldn't be more than one key at the same pos with
needs_whiteout set, likely due to a combination of flipping on
needs_whiteout on all written keys (they need whiteouts if overwritten),
combined with not always dropping unneeded whiteouts, and the tricky
logic in the sort path for preserving needs_whiteout that wasn't really
needed.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/bkey_sort.c')
-rw-r--r-- | fs/bcachefs/bkey_sort.c | 79 |
1 files changed, 46 insertions, 33 deletions
diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index bcca9e76a0b4..4536eb50fc40 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -6,9 +6,9 @@ #include "bset.h" #include "extents.h" -typedef int (*sort_cmp_fn)(struct btree *, - struct bkey_packed *, - struct bkey_packed *); +typedef int (*sort_cmp_fn)(const struct btree *, + const struct bkey_packed *, + const struct bkey_packed *); static inline bool sort_iter_end(struct sort_iter *iter) { @@ -70,9 +70,9 @@ static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, /* * If keys compare equal, compare by pointer order: */ -static inline int key_sort_fix_overlapping_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) +static inline int key_sort_fix_overlapping_cmp(const struct btree *b, + const struct bkey_packed *l, + const struct bkey_packed *r) { return bch2_bkey_cmp_packed(b, l, r) ?: cmp_int((unsigned long) l, (unsigned long) r); @@ -154,46 +154,59 @@ bch2_sort_repack(struct bset *dst, struct btree *src, return nr; } -static inline int sort_keys_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) +static inline int keep_unwritten_whiteouts_cmp(const struct btree *b, + const struct bkey_packed *l, + const struct bkey_packed *r) { return bch2_bkey_cmp_packed_inlined(b, l, r) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: - (int) l->needs_whiteout - (int) r->needs_whiteout; + (long) l - (long) r; } -unsigned bch2_sort_keys(struct bkey_packed *dst, - struct sort_iter *iter, - bool filter_whiteouts) +#include "btree_update_interior.h" + +/* + * For sorting in the btree node write path: whiteouts not in the unwritten + * whiteouts area are dropped, whiteouts in the unwritten whiteouts area are + * dropped if overwritten by real keys: + */ +unsigned bch2_sort_keys_keep_unwritten_whiteouts(struct bkey_packed *dst, struct sort_iter *iter) { - const struct bkey_format *f = &iter->b->format; struct bkey_packed *in, *next, *out = dst; - sort_iter_sort(iter, sort_keys_cmp); + sort_iter_sort(iter, keep_unwritten_whiteouts_cmp); - while ((in = sort_iter_next(iter, sort_keys_cmp))) { - bool needs_whiteout = false; + while ((in = sort_iter_next(iter, keep_unwritten_whiteouts_cmp))) { + if (bkey_deleted(in) && in < unwritten_whiteouts_start(iter->b)) + continue; - if (bkey_deleted(in) && - (filter_whiteouts || !in->needs_whiteout)) + if ((next = sort_iter_peek(iter)) && + !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) continue; - while ((next = sort_iter_peek(iter)) && - !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) { - BUG_ON(in->needs_whiteout && - next->needs_whiteout); - needs_whiteout |= in->needs_whiteout; - in = sort_iter_next(iter, sort_keys_cmp); - } + bkey_p_copy(out, in); + out = bkey_p_next(out); + } - if (bkey_deleted(in)) { - memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in)); - set_bkeyp_val_u64s(f, out, 0); - } else { - bkey_p_copy(out, in); - } - out->needs_whiteout |= needs_whiteout; + return (u64 *) out - (u64 *) dst; +} + +/* + * Main sort routine for compacting a btree node in memory: we always drop + * whiteouts because any whiteouts that need to be written are in the unwritten + * whiteouts area: + */ +unsigned bch2_sort_keys(struct bkey_packed *dst, struct sort_iter *iter) +{ + struct bkey_packed *in, *out = dst; + + sort_iter_sort(iter, bch2_bkey_cmp_packed_inlined); + + while ((in = sort_iter_next(iter, bch2_bkey_cmp_packed_inlined))) { + if (bkey_deleted(in)) + continue; + + bkey_p_copy(out, in); out = bkey_p_next(out); } |