From e61dd678601eac53d501dda1eb1bcffec7b11bd6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 18:21:32 -0400 Subject: bcachefs: Fix double free of ca->buckets_nouse Reported-by: Dan Carpenter Fixes: ffcbec6076 ("bcachefs: Kill opts.buckets_nouse") Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0455a1001fec..e7fa2de35014 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); - kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); -- cgit From 7442b5cdf259e2fb112560904b7002ce48d15578 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 20:33:25 -0400 Subject: bcachefs: Don't rely on implicit unsigned -> signed integer conversion implicit integer conversion is a fertile source of bugs, and we really would rather not have the min()/max() macros doing it implicitly. bcachefs appears to be the only place in the kernel where this happens, so let's fix it. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8d2b62c9588e..f13e619b4b21 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -166,8 +166,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, * avoid overflowing LRU_TIME_BITS on a corrupted fs, when * bucket_sectors_dirty is (much) bigger than bucket_size */ - u64 d = min(bch2_bucket_sectors_dirty(a), - ca->mi.bucket_size); + u64 d = min_t(s64, bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } -- cgit From 90b211fa2da3f36939e84b6426988832a62caf4b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 20:35:59 -0400 Subject: bcachefs: Add a comment for bucket helper types We've had bugs in the past with incorrect integer conversions in disk accounting code, which is why bucket helpers now always return s64s; add a comment explaining this. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index f13e619b4b21..96a0444ea78f 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, bucket_data_type(bucket) != bucket_data_type(ptr); } +/* + * It is my general preference to use unsigned types for unsigned quantities - + * however, these helpers are used in disk accounting calculations run by + * triggers where the output will be negated and added to an s64. unsigned is + * right out even though all these quantities will fit in 32 bits, since it + * won't be sign extended correctly; u64 will negate "correctly", but s64 is the + * simpler option here. + */ static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) { return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; -- cgit From 02026e8931366158d7395f87afeb0b535210dbee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Aug 2024 22:49:31 -0400 Subject: bcachefs: Add missing bch2_trans_begin() call Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4531c9ab3e12..7ee3b75480df 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_slots); retry: + bch2_trans_begin(trans); rbio->bio.bi_status = 0; k = bch2_btree_iter_peek_slot(&iter); -- cgit From c1e4446247b2a8919649fb9aae2d86f53bf3d1e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Aug 2024 21:02:34 -0400 Subject: bcachefs: Improved allocator debugging for ec chasing down a device removal deadlock with erasure coding Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 6 ++++-- fs/bcachefs/alloc_foreground.h | 2 +- fs/bcachefs/ec.c | 31 ++++++++++++++++++++----------- fs/bcachefs/sysfs.c | 6 +++++- 4 files changed, 30 insertions(+), 15 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 618d2ff0292e..8683fe4fae5b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope prt_newline(out); } -void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_dev *ca) { struct open_bucket *ob; @@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); - if (ob->valid && !ob->on_partial_list) + if (ob->valid && !ob->on_partial_list && + (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 6da9e7e29026..c78a64ec0553 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); -void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); +void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 9b5b5c9a6c63..8d0cca2f14ed 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2235,6 +2235,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) mutex_unlock(&c->ec_stripes_heap_lock); } +static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c, + struct ec_stripe_new *s) +{ + prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs", + s->idx, s->nr_data, s->nr_parity, + bitmap_weight(s->blocks_allocated, s->nr_data), + atomic_read(&s->ref[STRIPE_REF_io]), + atomic_read(&s->ref[STRIPE_REF_stripe]), + bch2_watermarks[s->h->watermark]); + + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i; + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) + prt_printf(out, " %u", s->blocks[i]); + prt_newline(out); +} + void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) { struct ec_stripe_head *h; @@ -2247,23 +2264,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) bch2_watermarks[h->watermark]); if (h->s) - prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", - h->s->idx, h->s->nr_data, h->s->nr_parity, - bitmap_weight(h->s->blocks_allocated, - h->s->nr_data)); + bch2_new_stripe_to_text(out, c, h->s); } mutex_unlock(&c->ec_stripe_head_lock); prt_printf(out, "in flight:\n"); mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", - s->idx, s->nr_data, s->nr_parity, - atomic_read(&s->ref[STRIPE_REF_io]), - atomic_read(&s->ref[STRIPE_REF_stripe]), - bch2_watermarks[s->h->watermark]); - } + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1c0d1fb20276..f393023a3ae2 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -367,7 +367,7 @@ SHOW(bch2_fs) bch2_stripes_heap_to_text(out, c); if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c); + bch2_open_buckets_to_text(out, c, NULL); if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); @@ -811,6 +811,9 @@ SHOW(bch2_dev) if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c, ca); + return 0; } @@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, + &sysfs_open_buckets, NULL }; -- cgit From 2caca9fb166f82937110368768002511628e6e1f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Aug 2024 23:30:48 -0400 Subject: bcachefs: ec should not allocate from ro devs This fixes a device removal deadlock when using erasure coding. Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 8d0cca2f14ed..84f1cbf6497f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity); + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); if (i < h->s->nr_data) -- cgit From 6d496e02b4a70926c3bd4e7ab6249ff262eb3bc0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 18:03:54 -0400 Subject: bcachefs: Add missing path_traverse() to btree_iter_next_node() This fixes a bug exposed by the next path - we pop an assert in path_set_should_be_locked(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 36872207f09b..aa8a049071f4 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bch2_trans_verify_not_in_restart(trans); bch2_btree_iter_verify(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (ret) + goto err; + + struct btree_path *path = btree_iter_path(trans, iter); /* already at end? */ -- cgit From cecf72798b25fcb00303392407fccf500a746747 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 13:58:57 -0400 Subject: bcachefs: Make allocator stuck timeout configurable, ratelimit messages Limit these messages to once every 2 minutes to avoid spamming logs; with multiple devices the output can be quite significant. Also, up the default timeout to 30 seconds from 10 seconds. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 26 ++++++++++++++++++++++++-- fs/bcachefs/alloc_foreground.h | 7 ++++++- fs/bcachefs/bcachefs.h | 2 ++ fs/bcachefs/bcachefs_format.h | 2 ++ fs/bcachefs/io_misc.c | 6 +----- fs/bcachefs/io_write.c | 5 +---- fs/bcachefs/opts.h | 5 +++++ fs/bcachefs/super-io.c | 4 ++++ 8 files changed, 45 insertions(+), 12 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 8683fe4fae5b..02de5ad2be2c 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1758,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); } -void bch2_print_allocator_stuck(struct bch_fs *c) +static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); + prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", + c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); printbuf_indent_add(&buf, 2); @@ -1792,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } + +static inline unsigned allocator_wait_timeout(struct bch_fs *c) +{ + if (c->allocator_last_stuck && + time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies)) + return 0; + + return c->opts.allocator_stuck_timeout * HZ; +} + +void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + unsigned t = allocator_wait_timeout(c); + + if (t && closure_sync_timeout(cl, t)) { + c->allocator_last_stuck = jiffies; + bch2_print_allocator_stuck(c); + } + + closure_sync(cl); +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index c78a64ec0553..386d231ceca3 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); -void bch2_print_allocator_stuck(struct bch_fs *); +void __bch2_wait_on_allocator(struct bch_fs *, struct closure *); +static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + if (cl->closure_get_happened) + __bch2_wait_on_allocator(c, cl); +} #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91361a167dcd..eedf2d6045e7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -893,6 +893,8 @@ struct bch_fs { struct bch_fs_usage_base __percpu *usage; u64 __percpu *online_reserved; + unsigned long allocator_last_stuck; + struct io_clock io_clock[2]; /* JOURNAL SEQ BLACKLIST */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 74a60b1a4ddf..ad893684db52 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 2cf6297756f8..177ed331c00b 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -126,11 +126,7 @@ err_noprint: if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); - - if (closure_sync_timeout(&cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&cl); - } + bch2_wait_on_allocator(c, &cl); } return ret; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d31c8d006d97..1d4761d15002 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1503,10 +1503,7 @@ err: if ((op->flags & BCH_WRITE_SYNC) || (!(op->flags & BCH_WRITE_SUBMITTED) && !(op->flags & BCH_WRITE_IN_WORKER))) { - if (closure_sync_timeout(&op->cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&op->cl); - } + bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 60b93018501f..cda1725702ea 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -391,6 +391,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ + x(allocator_stuck_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U16_MAX), \ + BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \ + NULL, "Default timeout in seconds for stuck allocator messages")\ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8bc819832790..c8c2ccbdfbb5 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && + !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) + SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); } for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { -- cgit From 73dc1656f41a42849e43b945fe44d4e3d55eb6c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 16:40:14 -0400 Subject: bcachefs: Use bch2_wait_on_allocator() in btree node alloc path If the allocator gets stuck, we need to know why. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 31ee50184be2..e61f9695771e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } -- cgit From f39bae2e028b841732ca81d8131d27b48a6051ad Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 15:42:23 -0400 Subject: bcachefs: Switch to .get_inode_acl() .set_acl() requires a dentry, and if one isn't passed it marks the VFS inode as not having an ACL. This has been causing inodes with ACLs to have them "disappear" on bcachefs filesystem, depending on which path those inodes get pulled into the cache from. Switching to .get_inode_acl(), like other local filesystems, fixes this. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 11 +++++++---- fs/bcachefs/acl.h | 2 +- fs/bcachefs/fs.c | 8 ++++---- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index a7b425d3c8a0..331a17f3f113 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct posix_acl *acl = NULL; + + if (rcu) + return ERR_PTR(-ECHILD); + + struct btree_trans *trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 27e7eec0f278..fe730a6bf0c1 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t); #ifdef CONFIG_BCACHEFS_POSIX_ACL -struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); +struct posix_acl *bch2_get_acl(struct inode *, int, bool); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3a5f49affa0a..15fc41e63b6c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; -- cgit From 077e47372309dcbe3a150754ea9c6f15cc838d6b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 Aug 2024 23:19:59 -0400 Subject: bcachefs: bch2_accounting_invalid() Implement bch2_accounting_invalid(); check for junk at the end, and replicas accounting entries in particular need to be checked or we'll pop asserts later. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 65 +++++++++++++++++++++++++++++++++++- fs/bcachefs/disk_accounting_format.h | 9 +++-- fs/bcachefs/replicas.c | 1 - fs/bcachefs/sb-errors_format.h | 6 +++- 4 files changed, 73 insertions(+), 8 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index dcdd59249c23..046ac92b6639 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,11 +114,74 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } +static inline bool is_zero(char *start, char *end) +{ + BUG_ON(start > end); + + for (; start < end; start++) + if (*start) + return false; + return true; +} + +#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags, struct printbuf *err) { - return 0; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_nr_inodes: + end = field_end(acc_k, nr_inodes); + break; + case BCH_DISK_ACCOUNTING_persistent_reserved: + end = field_end(acc_k, persistent_reserved); + break; + case BCH_DISK_ACCOUNTING_replicas: + bkey_fsck_err_on(!acc_k.replicas.nr_devs, + c, err, accounting_key_replicas_nr_devs_0, + "accounting key replicas entry with nr_devs=0"); + + bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || + (acc_k.replicas.nr_required > 1 && + acc_k.replicas.nr_required == acc_k.replicas.nr_devs), + c, err, accounting_key_replicas_nr_required_bad, + "accounting key replicas entry with bad nr_required"); + + for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) + bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], + c, err, accounting_key_replicas_devs_unsorted, + "accounting key replicas entry with unsorted devs"); + + end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + end = field_end(acc_k, dev_data_type); + break; + case BCH_DISK_ACCOUNTING_compression: + end = field_end(acc_k, compression); + break; + case BCH_DISK_ACCOUNTING_snapshot: + end = field_end(acc_k, snapshot); + break; + case BCH_DISK_ACCOUNTING_btree: + end = field_end(acc_k, btree); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + end = field_end(acc_k, rebalance_work); + break; + } + + bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), + c, err, accounting_key_junk_at_end, + "junk at end of accounting key"); +fsck_err: + return ret; } void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k) diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index cba417060b33..848f06cc809d 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -124,10 +124,6 @@ struct bch_dev_data_type { __u8 data_type; }; -struct bch_dev_stripe_buckets { - __u8 dev; -}; - struct bch_acct_compression { __u8 type; }; @@ -140,6 +136,9 @@ struct bch_acct_btree { __u32 id; }; +struct bch_acct_rebalance_work { +}; + struct disk_accounting_pos { union { struct { @@ -149,10 +148,10 @@ struct disk_accounting_pos { struct bch_persistent_reserved persistent_reserved; struct bch_replicas_entry_v1 replicas; struct bch_dev_data_type dev_data_type; - struct bch_dev_stripe_buckets dev_stripe_buckets; struct bch_acct_compression compression; struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; + struct bch_acct_rebalance_work rebalance_work; }; }; struct bpos _pad; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 10c96cb2047a..1223b710755d 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d1b2f2aa397a..d3a498617303 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -287,7 +287,11 @@ enum bch_fsck_flags { x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ - x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ + x(accounting_key_junk_at_end, 277, 0) \ + x(accounting_key_replicas_nr_devs_0, 278, 0) \ + x(accounting_key_replicas_nr_required_bad, 279, 0) \ + x(accounting_key_replicas_devs_unsorted, 280, 0) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit From 1a9e219db15e62760cfcc107ab6df3796d353605 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 Aug 2024 23:44:00 -0400 Subject: bcachefs: improve bch2_dev_usage_to_text() Add a line for capacity Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/buckets.c | 12 ++++++++---- fs/bcachefs/buckets.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 02de5ad2be2c..8563c2d26847 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1740,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - bch2_dev_usage_to_text(out, &stats); + bch2_dev_usage_to_text(out, ca, &stats); prt_newline(out); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2650a0d24663..9f7004e941ce 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +void bch2_dev_usage_to_text(struct printbuf *out, + struct bch_dev *ca, + struct bch_dev_usage *usage) { prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); for (unsigned i = 0; i < BCH_DATA_NR; i++) { bch2_prt_data_type(out, i); prt_printf(out, "\t%llu\r%llu\r%llu\r\n", - usage->d[i].buckets, - usage->d[i].sectors, - usage->d[i].fragmented); + usage->d[i].buckets, + usage->d[i].sectors, + usage->d[i].fragmented); } + + prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); } static int bch2_check_fix_ptr(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2d35eeb24a2d..edbdffd508fc 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { -- cgit From 8a2491db7bea6ad88ec568731eafd583501f1c96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 Aug 2024 00:25:25 -0400 Subject: bcachefs: bcachefs_metadata_version_disk_accounting_v3 bcachefs_metadata_version_disk_accounting_v2 erroneously had padding bytes in disk_accounting_key, which is a problem because we have to guarantee that all unused bytes in disk_accounting_key are zeroed. Fortunately 6.11 isn't out yet, so it's cheap to fix this by spinning a new version. Reported-by: Gabriel de Perthuis Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/disk_accounting_format.h | 8 ++++---- fs/bcachefs/sb-downgrade.c | 27 ++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index ad893684db52..b25f86356728 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,7 +675,8 @@ struct bch_sb_field_ext { x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ - x(disk_accounting_v2, BCH_VERSION(1, 9)) + x(disk_accounting_v2, BCH_VERSION(1, 9)) \ + x(disk_accounting_v3, BCH_VERSION(1, 10)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index 848f06cc809d..a93cf26ff4a9 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -130,11 +130,11 @@ struct bch_acct_compression { struct bch_acct_snapshot { __u32 id; -}; +} __packed; struct bch_acct_btree { __u32 id; -}; +} __packed; struct bch_acct_rebalance_work { }; @@ -152,8 +152,8 @@ struct disk_accounting_pos { struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; struct bch_acct_rebalance_work rebalance_work; - }; - }; + } __packed; + } __packed; struct bpos _pad; }; }; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index dfbbd33c8731..6c4469f53313 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -61,12 +61,37 @@ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ - BCH_FSCK_ERR_accounting_mismatch) + BCH_FSCK_ERR_accounting_mismatch) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_bkey_version_in_future, \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ + BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ + BCH_FSCK_ERR_accounting_key_junk_at_end) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ 0) \ x(disk_accounting_v2, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_fs_usage_hidden_wrong, \ + BCH_FSCK_ERR_fs_usage_btree_wrong, \ + BCH_FSCK_ERR_fs_usage_data_wrong, \ + BCH_FSCK_ERR_fs_usage_cached_wrong, \ + BCH_FSCK_ERR_fs_usage_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ + BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(disk_accounting_v3, \ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ -- cgit From a24e6e7146e361aa0855cf8ee3b2e80b8eb692e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 22:40:39 -0400 Subject: bcachefs: delete faulty fastpath in bch2_btree_path_traverse_cached() bch2_btree_path_traverse_cached() was previously checking if it could just relock the path, which is a common idiom in path traversal. However, it was using btree_node_relock(), not btree_path_relock(); btree_path_relock() only succeeds if the path was in state BTREE_ITER_NEED_RELOCK. If the path was in state BTREE_ITER_NEED_TRAVERSE a full traversal is needed; this led to a null ptr deref in bch2_btree_path_traverse_cached(). And the short circuit check here isn't needed, since it was already done in the main bch2_btree_path_traverse_one(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b..79954490627c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); -- cgit From bd864bc2d90790e00b02b17c75fb951cb4b0bb8b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 23:24:03 -0400 Subject: bcachefs: Fix bch2_trigger_alloc when upgrading from old versions bch2_trigger_alloc was assuming that the new key would always be newly created and thus always an alloc_v4 key, but - not when called from btree_gc. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708..0a8a1bc9a4ac 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -829,7 +829,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & BTREE_TRIGGER_gc)); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); -- cgit From d9e615762bf2eb7459fb0f270525f8b186bce6b7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 04:53:12 -0400 Subject: bcachefs: bch2_accounting_invalid() fixup Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 046ac92b6639..212f53927111 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -154,7 +154,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) - bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], c, err, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); -- cgit From 486d920735325e507d965c2639ba2775b81fd329 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 22:47:55 -0400 Subject: bcachefs: disk accounting: ignore unknown types forward compat fix Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 212f53927111..03a9de6c2e0a 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -528,6 +528,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -760,6 +763,12 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -775,9 +784,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; -- cgit From 48d6cc1b4895ada0781da11a0a483332a236ec14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 01:01:35 -0400 Subject: bcachefs: Add missing downgrade table entry Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 6c4469f53313..9f82d497d9e0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -104,6 +104,7 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { -- cgit From 968feb854a86b59cc4bc72af3105989706ca2c7d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 16:34:28 -0400 Subject: bcachefs: Convert for_each_btree_node() to lockrestart_do() for_each_btree_node() now works similarly to for_each_btree_key(), where the loop body is passed as an argument to be passed to lockrestart_do(). This now calls trans_begin() on every loop iteration - which fixes an SRCU warning in backpointers fsck. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 15 +++++---------- fs/bcachefs/btree_iter.c | 1 + fs/bcachefs/btree_iter.h | 42 +++++++++++++++++++++++++++--------------- fs/bcachefs/debug.c | 38 +++++++++----------------------------- 4 files changed, 42 insertions(+), 54 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a982..9edc4c5f735c 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -763,27 +763,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index aa8a049071f4..2e84d22e17bd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c..dca62375d7d3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -600,23 +600,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5..45aec1afdb0e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); - - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { -- cgit From 7254555c440ff6b136aa97fb3c33fd5e0bb4fb9f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 14:40:09 -0400 Subject: bcachefs: Add hysteresis to waiting on btree key cache flush This helps ensure key cache reclaim isn't contending with threads waiting for the key cache to be helped, and fixes a severe performance bug. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.h | 9 +++++++++ fs/bcachefs/btree_trans_commit.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1..113309f62918 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -20,6 +20,15 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) return nr_dirty > max_dirty; } +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; +} + int bch2_btree_key_cache_journal_flush(struct journal *, struct journal_entry_pin *, u64); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9..f567bfb82850 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -927,7 +927,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); -- cgit From 790666c8ac6427ddaa00f502dc44073c1e039355 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 14:31:17 -0400 Subject: bcachefs: Improve trans_blocked_journal_reclaim tracepoint include information about the state of the btree key cache Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.h | 9 +++++++-- fs/bcachefs/trace.c | 1 + fs/bcachefs/trace.h | 27 +++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 113309f62918..51d6289b8dee 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,18 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; } static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b4..dfad1d06633d 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb..c62f00322d1e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, -- cgit From 06a8693b890c0cf7d94bf7c6f0e2adf3a3aaa346 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 15:48:18 -0400 Subject: bcachefs: Add a time_stat for blocked on key cache flush Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/btree_trans_commit.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index eedf2d6045e7..0c7086e00d18 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index f567bfb82850..ac0c92683aad 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -973,9 +973,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; -- cgit From c99471024f24b3cbafc02bf5b112ecf34b0dbd40 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 23:29:46 -0400 Subject: bcachefs: Fix warning in __bch2_fsck_err() for trans not passed in Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4a..70ebcca08ba2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1767,6 +1767,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { -- cgit From d97de0d017cde0d442c3d144b4f969f43064cc0f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 21:31:25 -0400 Subject: bcachefs: Make bkey_fsck_err() a wrapper around fsck_err() bkey_fsck_err() was added as an interface that looks like fsck_err(), but previously all it did was ensure that the appropriate error counter was incremented in the superblock. This is a cleanup and bugfix patch that converts it to a wrapper around fsck_err(). This is needed to fix an issue with the upgrade path to disk_accounting_v3, where the "silent fix" error list now includes bkey_fsck errors; fsck_err() handles this in a unified way, and since we need to change printing of bkey fsck errors from the caller to the inner bkey_fsck_err() calls, this ends up being a pretty big change. Als,, rename .invalid() methods to .validate(), for clarity, while we're changing the function signature anyways (to drop the printbuf argument). Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 63 ++++++++-------- fs/bcachefs/alloc_background.h | 26 +++---- fs/bcachefs/backpointers.c | 8 +- fs/bcachefs/backpointers.h | 5 +- fs/bcachefs/bkey.h | 7 +- fs/bcachefs/bkey_methods.c | 109 +++++++++++++-------------- fs/bcachefs/bkey_methods.h | 21 +++--- fs/bcachefs/btree_io.c | 67 +++++------------ fs/bcachefs/btree_node_scan.c | 2 +- fs/bcachefs/btree_trans_commit.c | 72 ++++-------------- fs/bcachefs/btree_update_interior.c | 16 +--- fs/bcachefs/data_update.c | 6 +- fs/bcachefs/dirent.c | 33 ++++----- fs/bcachefs/dirent.h | 5 +- fs/bcachefs/disk_accounting.c | 13 ++-- fs/bcachefs/disk_accounting.h | 5 +- fs/bcachefs/ec.c | 15 ++-- fs/bcachefs/ec.h | 5 +- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 22 ++++++ fs/bcachefs/error.h | 39 ++++++---- fs/bcachefs/extents.c | 144 ++++++++++++++++++------------------ fs/bcachefs/extents.h | 24 +++--- fs/bcachefs/inode.c | 77 +++++++++---------- fs/bcachefs/inode.h | 24 +++--- fs/bcachefs/journal_io.c | 24 ++---- fs/bcachefs/lru.c | 9 +-- fs/bcachefs/lru.h | 5 +- fs/bcachefs/quota.c | 8 +- fs/bcachefs/quota.h | 5 +- fs/bcachefs/reflink.c | 19 ++--- fs/bcachefs/reflink.h | 22 +++--- fs/bcachefs/snapshot.c | 42 +++++------ fs/bcachefs/snapshot.h | 11 ++- fs/bcachefs/subvolume.c | 16 ++-- fs/bcachefs/subvolume.h | 5 +- fs/bcachefs/xattr.c | 21 +++--- fs/bcachefs/xattr.h | 5 +- 38 files changed, 448 insertions(+), 553 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 0a8a1bc9a4ac..fd3a2522bc3e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); for (unsigned i = 0; i < 2; i++) bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", a.v->io_time[i], LRU_TIME_MAX); @@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->cached_sectors || a.v->stripe, - c, err, alloc_key_empty_but_have_data, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, a.v->dirty_sectors, @@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_parity: bkey_fsck_err_on(!a.v->dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_type_str(a.v->data_type)); break; @@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || stripe_sectors || a.v->stripe, - c, err, alloc_key_cached_inconsistency, + c, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 96a0444ea78f..260e7fa83d05 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -240,52 +240,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 9edc4c5f735c..d4da6343efa9 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98..7daecadb764e 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0..e34cb2bf329c 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c..88d8958281e8 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb..3df3dd2723a1 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 70ebcca08ba2..56ea9a77cd4a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1954,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 001107226377..b28c649c6838 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index ac0c92683aad..1a1e9d2036da 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -1064,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1088,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e61f9695771e..b3454d4619e8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1364,18 +1364,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead..6a854c918496 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -250,10 +250,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308e..32bfdf19289a 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a09..8945145865c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 03a9de6c2e0a..f059cbffdf23 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -126,9 +126,8 @@ static inline bool is_zero(char *start, char *end) #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, k.k->p); @@ -144,18 +143,18 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, break; case BCH_DISK_ACCOUNTING_replicas: bkey_fsck_err_on(!acc_k.replicas.nr_devs, - c, err, accounting_key_replicas_nr_devs_0, + c, accounting_key_replicas_nr_devs_0, "accounting key replicas entry with nr_devs=0"); bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || (acc_k.replicas.nr_required > 1 && acc_k.replicas.nr_required == acc_k.replicas.nr_devs), - c, err, accounting_key_replicas_nr_required_bad, + c, accounting_key_replicas_nr_required_bad, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], - c, err, accounting_key_replicas_devs_unsorted, + c, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); @@ -178,7 +177,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, } bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), - c, err, accounting_key_junk_at_end, + c, accounting_key_junk_at_end, "junk at end of accounting key"); fsck_err: return ret; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69..b92f8c2e3054 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 84f1cbf6497f..141a4c63142f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb6249..90962b3c0130 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bf..ab5a7adece10 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820..95afa7bf2020 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad..2f1b86978f36 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include #include +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35f..4419ad3e454e 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1102,14 +1099,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1123,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1158,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1192,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1244,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec..1a6ddee48041 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1..2be6be33afa3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a745099..f1fcb4c58039 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63..7664b68e6a15 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb371..96f2f4f8c397 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11..e6a7d8241bb8 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0a..c32a05e252e2 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218..a62abcc5332a 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525..e59c0abb4772 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67a..51afe11d8ed6 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5..8b18a9b483a4 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e962..eb5ef64221d6 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b55862..dbe834cb349f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb2..a8299ba2cab2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c..f2b4c17a0307 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c85..c188a5ad64ce 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) -- cgit From 5132b99bb62664c02bd6c0dd62ad3fedc75294d8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 02:35:10 -0400 Subject: bcachefs: Kill __bch2_accounting_mem_mod() The next patch will be adding a disk accounting counter type which is not kept in the in-memory eytzinger tree. As prep, fold __bch2_accounting_mem_mod() into bch2_accounting_mem_mod_locked() so that we can check for that counter type and bail out without calling bpos_to_disk_accounting_pos() twice. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 4 +-- fs/bcachefs/disk_accounting.c | 4 +-- fs/bcachefs/disk_accounting.h | 54 ++++++++++++++++++---------------------- 3 files changed, 28 insertions(+), 34 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1a1e9d2036da..a0101d9c5d83 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f059cbffdf23..e57b40623cd9 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -566,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -595,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index b92f8c2e3054..653090667aaa 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -106,41 +106,17 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx; - - EBUG_ON(gc && !acc->gc_running); - - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); - if (ret) - return ret; - } - - struct accounting_mem_entry *e = &acc->k.data[idx]; - - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); - - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) - this_cpu_add(e->v[gc][i], a.v->d[i]); - return 0; -} - /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -161,13 +137,31 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - return __bch2_accounting_mem_mod(c, a, gc); + struct bch_accounting_mem *acc = &c->accounting; + unsigned idx; + + EBUG_ON(gc && !acc->gc_running); + + while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { + int ret = bch2_accounting_mem_insert(c, a, gc); + if (ret) + return ret; + } + + struct accounting_mem_entry *e = &acc->k.data[idx]; + + EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + + for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + this_cpu_add(e->v[gc][i], a.v->d[i]); + return 0; } static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } -- cgit From 58474f76a770bcc79d4b2d7232e4d6650e732b50 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 02:27:36 -0400 Subject: bcachefs: bcachefs_metadata_version_disk_accounting_inum This adds another disk accounting counter to track usage per inode number (any snapshot ID). This will be used for a couple things: - It'll give us a way to tell the user how much space a given file ista consuming in all snapshots; i.e. how much extra space it's consuming due to snapshot versioning. - It counts number of extents and total size of extents (both in btree keyspace sectors and actual disk usage), meaning it gives us average extent size: that is, it'll let us cheaply find fragmented files that should be defragmented. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/buckets.c | 14 ++++++++++++++ fs/bcachefs/disk_accounting.c | 3 +++ fs/bcachefs/disk_accounting.h | 3 +++ fs/bcachefs/disk_accounting_format.h | 8 +++++++- fs/bcachefs/sb-downgrade.c | 5 ++++- 6 files changed, 33 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b25f86356728..c75f2e0f32bb 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -676,7 +676,8 @@ struct bch_sb_field_ext { x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ - x(disk_accounting_v3, BCH_VERSION(1, 10)) + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 9f7004e941ce..b69ef4b3de6e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -810,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans, ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); if (ret) return ret; + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + replicas_sectors, + }; + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); + if (ret) + return ret; } if (bch2_bkey_rebalance_opts(k)) { diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e57b40623cd9..e972e2bca546 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -768,6 +768,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) continue; + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 653090667aaa..f29fd0dd9581 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -116,6 +116,9 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index a93cf26ff4a9..7b6e6c97e6aa 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -136,6 +137,10 @@ struct bch_acct_btree { __u32 id; } __packed; +struct bch_acct_inum { + __u64 inum; +} __packed; + struct bch_acct_rebalance_work { }; @@ -152,6 +157,7 @@ struct disk_accounting_pos { struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; } __packed; } __packed; struct bpos _pad; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 9f82d497d9e0..650a1f77ca40 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -72,7 +72,10 @@ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ -- cgit From c2f6e16a6771eaefba6bb35f6803fe7217822d41 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 15 Aug 2024 13:02:55 -0400 Subject: bcachefs: Increase size of cuckoo hash table on too many rehashes Also, improve the calculation of the new table size, so that it can shrink when needed. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets_waiting_for_journal.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d..f70eb2127d32 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,7 +106,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { @@ -115,7 +115,14 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; -- cgit From 075cabf324c3fd790d6ba39ff9db33a30b954fe2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:31:29 -0400 Subject: bcachefs: Fix forgetting to pass trans to fsck_err() Reported-by: syzbot+e3938cd6d761b78750e6@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index b69ef4b3de6e..ad517ef744e5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -925,7 +925,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", -- cgit From 9482f3b05332a624508a91c2ab2cf3527328a6a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:41:46 -0400 Subject: bcachefs: avoid overflowing LRU_TIME_BITS for cached data lru Reported-by: syzbot+510b0b28f8e6de64d307@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 260e7fa83d05..fd790b03fbe1 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -150,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ -- cgit From 99c87fe0f584f8d778a323141504d1ba5c89a4a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:44:49 -0400 Subject: bcachefs: fix incorrect i_state usage Reported-by: syzbot+95e40eae71609e40d851@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 15fc41e63b6c..94c392abef65 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new -- cgit From 0e49d3ff12501adaafaf6fdb19699f021d1eda1c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 23:48:58 -0400 Subject: bcachefs: Fix locking in __bch2_trans_mark_dev_sb() We run this in full RW mode now, so we have to guard against the superblock buffer being reallocated. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 5 +---- fs/bcachefs/buckets.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a947..eb3002c4eae7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ad517ef744e5..be2bbd248631 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -915,7 +915,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -1046,13 +1045,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1063,7 +1067,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; -- cgit From d6d539c9a7ad0655e5ad46b5e869f1b20bce8953 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 19:51:13 -0400 Subject: bcachefs: Reallocate table when we're increasing size Fixes: c2f6e16a6771 ("bcachefs: Increase size of cuckoo hash table on too many rehashes") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets_waiting_for_journal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index f70eb2127d32..f9fb150eda70 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -107,7 +107,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, nr_elements += t->d[i].journal_seq > flushed_seq; new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); - +realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set; @@ -118,6 +118,8 @@ retry_rehash: if (nr_rehashes_this_size == 3) { new_bits++; nr_rehashes_this_size = 0; + kvfree(n); + goto realloc; } nr_rehashes++; -- cgit From d9f49c3106e404776afcf6c5682357f4fe088beb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 16:41:39 -0400 Subject: bcachefs: fix field-spanning write warning attempts to retrofit memory safety onto C are increasingly annoying ------------[ cut here ]------------ memcpy: detected field-spanning write (size 4) of single field "&k.replicas" at fs/bcachefs/replicas.c:454 (size 3) WARNING: CPU: 5 PID: 6525 at fs/bcachefs/replicas.c:454 bch2_replicas_gc2+0x2cb/0x400 [bcachefs] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) Modules linked in: dm_mod tun nf_conntrack_netlink nfnetlink xt_addrtype br_netfilter overlay msr sctp bcachefs lz4hc_compress lz4_compress libcrc32c xor raid6_pq lz4_decompress pps_ldisc pps_core wireguard libchacha20poly1305 chacha_x86_64 poly1305_x86_64 ip6_udp_tunnel udp_tunnel curve25519_x86_64 libcurve25519_generic libchacha sit tunnel4 ip_tunnel af_packet bridge stp llc ip6table_nat ip6table_filter ip6_tables xt_MASQUERADE xt_conntrack iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter ip_tables x_tables tcp_bbr sch_fq_codel efivarfs nls_iso8859_1 nls_cp437 vfat fat cdc_mbim cdc_wdm cdc_ncm cdc_ether usbnet r8152 input_leds joydev mii amdgpu mousedev hid_generic usbhid hid ath10k_pci amd_atl edac_mce_amd ath10k_core kvm_amd ath kvm mac80211 bfq crc32_pclmul crc32c_intel polyval_clmulni polyval_generic sha512_ssse3 sha256_ssse3 sha1_ssse3 snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg i2c_algo_bit drm_exec snd_hda_codec r8169 drm_suballoc_helper aesni_intel gf128mul crypto_simd amdxcp realtek mfd_core tpm_crb drm_buddy snd_hwdep mdio_devres libarc4 cryptd tpm_tis wmi_bmof cfg80211 evdev libphy snd_hda_core tpm_tis_core gpu_sched rapl xhci_pci xhci_hcd snd_pcm drm_display_helper snd_timer tpm sp5100_tco rfkill efi_pstore mpt3sas drm_ttm_helper ahci usbcore libaescfb ccp snd ttm 8250 libahci watchdog soundcore raid_class sha1_generic acpi_cpufreq k10temp 8250_base usb_common scsi_transport_sas i2c_piix4 hwmon video serial_mctrl_gpio serial_base ecdh_generic wmi rtc_cmos backlight ecc gpio_amdpt rng_core gpio_generic button CPU: 5 UID: 0 PID: 6525 Comm: bcachefs Tainted: G W 6.11.0-rc1-ojab-00058-g224bc118aec9 #6 6d5debde398d2a84851f42ab300dae32c2992027 Tainted: [W]=WARN RIP: 0010:bch2_replicas_gc2+0x2cb/0x400 [bcachefs] Code: c7 c2 60 91 d1 c1 48 89 c6 48 c7 c7 98 91 d1 c1 4c 89 14 24 44 89 5c 24 08 48 89 44 24 20 c6 05 fa 68 04 00 01 e8 05 a3 40 e4 <0f> 0b 4c 8b 14 24 44 8b 5c 24 08 48 8b 44 24 20 e9 55 fe ff ff 8b RSP: 0018:ffffb434c9263d60 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9a8efa79cc00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb434c9263de0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000005 R13: ffff9a8efa73c300 R14: ffff9a8d9e880000 R15: ffff9a8d9e8806f8 FS: 0000000000000000(0000) GS:ffff9a9410c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565423373090 CR3: 0000000164e30000 CR4: 00000000003506f0 Call Trace: ? __warn+0x97/0x150 ? bch2_replicas_gc2+0x2cb/0x400 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) ? report_bug+0x196/0x1c0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x17/0x80 ? __wake_up_klogd.part.0+0x4c/0x80 ? asm_exc_invalid_op+0x16/0x20 ? bch2_replicas_gc2+0x2cb/0x400 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) ? bch2_dev_usage_read+0xa0/0xa0 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_dev_usage_read+0xa0/0xa0: discard_in_flight_remove at /home/ojab/src/bcachefs/fs/bcachefs/alloc_background.c:1712 Signed-off-by: Kent Overstreet --- fs/bcachefs/replicas.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 1223b710755d..12b1d28b7eb4 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -451,7 +451,8 @@ retry: .type = BCH_DISK_ACCOUNTING_replicas, }; - memcpy(&k.replicas, e, replicas_entry_bytes(e)); + unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), + "embedded variable length struct"); struct bpos p = disk_accounting_pos_to_bpos(&k); -- cgit From 47cdc7b14417a40af6a5d5909f1d28a5a23fc11d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 17:38:43 -0400 Subject: bcachefs: Fix incorrect gfp flags fixes: 00488 WARNING: CPU: 9 PID: 194 at mm/page_alloc.c:4410 __alloc_pages_noprof+0x1818/0x1888 00488 Modules linked in: 00488 CPU: 9 UID: 0 PID: 194 Comm: kworker/u66:1 Not tainted 6.11.0-rc1-ktest-g18fa10d6495f #2931 00488 Hardware name: linux,dummy-virt (DT) 00488 Workqueue: writeback wb_workfn (flush-bcachefs-2) 00488 pstate: 20001005 (nzCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00488 pc : __alloc_pages_noprof+0x1818/0x1888 00488 lr : __alloc_pages_noprof+0x5f4/0x1888 00488 sp : ffffff80ccd8ed00 00488 x29: ffffff80ccd8ed00 x28: 0000000000000000 x27: dfffffc000000000 00488 x26: 0000000000000010 x25: 0000000000000002 x24: 0000000000000000 00488 x23: 0000000000000000 x22: 1ffffff0199b1dbe x21: ffffff80cc680900 00488 x20: 0000000000000000 x19: ffffff80ccd8eed0 x18: 0000000000000000 00488 x17: ffffff80cc58a010 x16: dfffffc000000000 x15: 1ffffff00474e518 00488 x14: 1ffffff00474e518 x13: 1ffffff00474e518 x12: ffffffb8104701b9 00488 x11: 1ffffff8104701b8 x10: ffffffb8104701b8 x9 : ffffffc08043cde8 00488 x8 : 00000047efb8fe48 x7 : ffffff80ccd8ee20 x6 : 0000000000048000 00488 x5 : 1ffffff810470138 x4 : 0000000000000050 x3 : 1ffffff0199b1d94 00488 x2 : ffffffb0199b1d94 x1 : 0000000000000001 x0 : ffffffc082387448 00488 Call trace: 00488 __alloc_pages_noprof+0x1818/0x1888 00488 new_slab+0x284/0x2f0 00488 ___slab_alloc+0x208/0x8e0 00488 __kmalloc_noprof+0x328/0x340 00488 __bch2_writepage+0x106c/0x1830 00488 write_cache_pages+0xa0/0xe8 due to __GFP_NOFAIL without allowing reclaim Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index cc33d763f722..184d03851676 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -534,7 +534,7 @@ do_io: if (f_sectors > w->tmp_sectors) { kfree(w->tmp); - w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); + w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), GFP_NOFS|__GFP_NOFAIL); w->tmp_sectors = f_sectors; } -- cgit From 2102bdac67b55bf2d1df4ff757bced74e94a5f74 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 21:03:07 -0400 Subject: bcachefs: Extra debug for data move path We don't have sufficient information to debug: https://github.com/koverstreet/bcachefs/issues/726 - print out durability of extent ptrs, when non default - print the number of replicas we need in data_update_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 3 +++ fs/bcachefs/extents.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 6a854c918496..1ca628e93e87 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -475,6 +475,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, bch2_compression_opt_to_text(out, background_compression(*io_opts)); prt_newline(out); + prt_str(out, "opts.replicas:\t"); + prt_u64(out, io_opts->data_replicas); + prt_str(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 4419ad3e454e..9406f82fc255 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1017,6 +1017,8 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc prt_printf(out, "ptr: %u:%llu:%u gen %u", ptr->dev, b, offset, ptr->gen); + if (ca->mi.durability != 1) + prt_printf(out, " d=%u", ca->mi.durability); if (ptr->cached) prt_str(out, " cached"); if (ptr->unwritten) -- cgit From 3f53d050416e88122d53aabbadb1fede998004da Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 12:22:23 -0400 Subject: bcachefs: bch2_data_update_init() cleanup Factor out some helpers - this function has gotten much too big. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 179 ++++++++++++++++++++++++++-------------------- 1 file changed, 101 insertions(+), 78 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1ca628e93e87..5f49f4953b19 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -20,6 +20,76 @@ #include "subvolume.h" #include "trace.h" +static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) + bch2_dev_put(bch2_dev_have_ref(c, ptr->dev)); +} + +static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + if (!bch2_dev_tryget(c, ptr->dev)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); + } + return false; + } + } + return true; +} + +static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } +} + +static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + if (ctxt) { + bool locked; + + move_ctxt_wait_event(ctxt, + (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) || + list_empty(&ctxt->ios)); + + if (!locked) + bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); + } else { + if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + + bucket = PTR_BUCKET_POS(ca, ptr2); + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } + return false; + } + } + } + return true; +} + static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) { if (trace_move_extent_finish_enabled()) { @@ -355,17 +425,11 @@ void bch2_data_update_read_done(struct data_update *m, void bch2_data_update_exit(struct data_update *update) { struct bch_fs *c = update->op.c; - struct bkey_ptrs_c ptrs = - bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); - - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - if (c->opts.nocow_enabled) - bch2_bucket_nocow_unlock(&c->nocow_locks, - PTR_BUCKET_POS(ca, ptr), 0); - bch2_dev_put(ca); - } + struct bkey_s_c k = bkey_i_to_s_c(update->k.k); + if (c->opts.nocow_enabled) + bkey_nocow_unlock(c, k); + bkey_put_dev_refs(c, k); bch2_bkey_buf_exit(&update->k, c); bch2_disk_reservation_put(c, &update->op.res); bch2_bio_free_pages_pool(c, &update->op.wbio.bio); @@ -546,7 +610,6 @@ int bch2_data_update_init(struct btree_trans *trans, const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; - unsigned ptrs_locked = 0; int ret = 0; /* @@ -557,6 +620,15 @@ int bch2_data_update_init(struct btree_trans *trans, if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) return -BCH_ERR_data_update_done; + if (!bkey_get_dev_refs(c, k)) + return -BCH_ERR_data_update_done; + + if (c->opts.nocow_enabled && + !bkey_nocow_lock(c, ctxt, k)) { + bkey_put_dev_refs(c, k); + return -BCH_ERR_nocow_lock_blocked; + } + bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -578,40 +650,24 @@ int bch2_data_update_init(struct btree_trans *trans, m->op.compression_opt = background_compression(io_opts); m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; - bkey_for_each_ptr(ptrs, ptr) { - if (!bch2_dev_tryget(c, ptr->dev)) { - bkey_for_each_ptr(ptrs, ptr2) { - if (ptr2 == ptr) - break; - bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); - } - return -BCH_ERR_data_update_done; - } - } - unsigned durability_have = 0, durability_removing = 0; i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - bool locked; - - rcu_read_lock(); - if (((1U << i) & m->data_opts.rewrite_ptrs)) { - BUG_ON(p.ptr.cached); - - if (crc_is_compressed(p.crc)) - reserve_sectors += k.k->size; - - m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - durability_removing += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached && - !((1U << i) & m->data_opts.kill_ptrs)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); + if (!p.ptr.cached) { + rcu_read_lock(); + if (BIT(i) & m->data_opts.rewrite_ptrs) { + if (crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; + + m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); + } + rcu_read_unlock(); } - rcu_read_unlock(); /* * op->csum_type is normally initialized from the fs/file's @@ -626,24 +682,6 @@ int bch2_data_update_init(struct btree_trans *trans, if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) m->op.incompressible = true; - if (c->opts.nocow_enabled) { - if (ctxt) { - move_ctxt_wait_event(ctxt, - (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, - bucket, 0)) || - list_empty(&ctxt->ios)); - - if (!locked) - bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); - } else { - if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { - ret = -BCH_ERR_nocow_lock_blocked; - goto err; - } - } - ptrs_locked |= (1U << i); - } - i++; } @@ -664,7 +702,7 @@ int bch2_data_update_init(struct btree_trans *trans, /* if iter == NULL, it's just a promote */ if (iter) ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto done; + goto out; } m->op.nr_replicas = min(durability_removing, durability_required) + @@ -684,8 +722,7 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_data_update_to_text(&buf, m); WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); printbuf_exit(&buf); - ret = -BCH_ERR_data_update_done; - goto done; + goto out; } m->op.nr_replicas_required = m->op.nr_replicas; @@ -696,30 +733,16 @@ int bch2_data_update_init(struct btree_trans *trans, ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - goto err; + goto out; } if (bkey_extent_is_unwritten(k)) { bch2_update_unwritten_extent(trans, m); - goto done; + goto out; } return 0; -err: - i = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - if ((1U << i) & ptrs_locked) - bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); - bch2_dev_put(ca); - i++; - } - - bch2_bkey_buf_exit(&m->k, c); - bch2_bio_free_pages_pool(c, &m->op.wbio.bio); - return ret; -done: +out: bch2_data_update_exit(m); return ret ?: -BCH_ERR_data_update_done; } -- cgit From 8cc0e50614520c6c609c6ae32a65d0591b7865a1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 13:13:39 -0400 Subject: bcachefs: Fix "trying to move an extent, but nr_replicas=0" data_update_init() does a bunch of complicated stuff to decide how many replicas to add, since we only want to increase an extent's durability on an explicit rereplicate, but extent pointers may be on devices with different durability settings. There was a corner case when evacuating a device that had been set to durability=0 after data had been written to it, and extents on that device had already been rereplicated - then evacuate only needs to drop pointers on that device, not move them. So the assert for !m->op.nr_replicas was spurious; this was a perfectly legitimate case that needed to be handled. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5f49f4953b19..65176d51b502 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -695,16 +695,6 @@ int bch2_data_update_init(struct btree_trans *trans, * Increasing replication is an explicit operation triggered by * rereplicate, currently, so that users don't get an unexpected -ENOSPC */ - if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) && - !durability_required) { - m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; - m->data_opts.rewrite_ptrs = 0; - /* if iter == NULL, it's just a promote */ - if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto out; - } - m->op.nr_replicas = min(durability_removing, durability_required) + m->data_opts.extra_replicas; @@ -716,17 +706,22 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); - if (!m->op.nr_replicas) { - struct printbuf buf = PRINTBUF; + m->op.nr_replicas_required = m->op.nr_replicas; - bch2_data_update_to_text(&buf, m); - WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); - printbuf_exit(&buf); + /* + * It might turn out that we don't need any new replicas, if the + * replicas or durability settings have been changed since the extent + * was written: + */ + if (!m->op.nr_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); goto out; } - m->op.nr_replicas_required = m->op.nr_replicas; - if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas -- cgit From 548e7f51679bf0ec3cdc2027d780c5d06a2a7ac6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 13:24:26 -0400 Subject: bcachefs: setting bcachefs_effective.* xattrs is a noop bcachefs_effective.* xattrs show the options inherited from parent directories (as well as explicitly set); this namespace is not for setting bcachefs options. Change the .set() handler to a noop so that if e.g. rsync is copying xattrs it'll do the right thing, and only copy xattrs in the bcachefs.* namespace. We don't want to return an error, because that will cause rsync to bail out or get spammy. Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f2b4c17a0307..331f944d73dc 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -612,10 +612,20 @@ static int bch2_xattr_bcachefs_get_effective( name, buffer, size, true); } +/* Noop - xattrs in the bcachefs_effective namespace are inherited */ +static int bch2_xattr_bcachefs_set_effective(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *vinode, + const char *name, const void *value, + size_t size, int flags) +{ + return 0; +} + static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { .prefix = "bcachefs_effective.", .get = bch2_xattr_bcachefs_get_effective, - .set = bch2_xattr_bcachefs_set, + .set = bch2_xattr_bcachefs_set_effective, }; #endif /* NO_BCACHEFS_FS */ -- cgit From 49203a6b9d12bfd1a223a67847a631a78f1cd782 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 15:08:12 -0400 Subject: bcachefs: Fix failure to relock in btree_node_get() discovered by new trans->locked asserts Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index f5d85b50b6f2..cc778d7e769e 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -974,6 +974,10 @@ retry: bch2_btree_node_wait_on_read(b); + ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: -- cgit From e150a7e89c4727176d07f5a0a8966fc2af05821c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 20:18:34 -0400 Subject: bcachefs: Fix bch2_trigger_alloc assert On testing on an old mangled filesystem, we missed a case. Fixes: bd864bc2d907 ("bcachefs: Fix bch2_trigger_alloc when upgrading from old versions") Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fd3a2522bc3e..488d0710f7b9 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -829,7 +829,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (likely(new.k->type == KEY_TYPE_alloc_v4)) { new_a = bkey_s_to_alloc_v4(new).v; } else { - BUG_ON(!(flags & BTREE_TRIGGER_gc)); + BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair))); struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); ret = PTR_ERR_OR_ZERO(new_ka); -- cgit From c2a503f3e98e191d86738f5438a3a2b69575c830 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 20:38:49 -0400 Subject: bcachefs: Fix bch2_bucket_gens_init() Comparing the wrong bpos - this was missed because normally bucket_gens_init() runs on brand new filesystems, but this bug caused it to overwrite bucket_gens keys with 0s when upgrading ancient filesystems. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 488d0710f7b9..ac933142aeda 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -556,7 +556,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) struct bpos pos = alloc_gens_pos(iter.pos, &offset); int ret2 = 0; - if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { + if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) { ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret2) -- cgit From b8db1bd8020d5fecb3bf46cd8b954a657c20ba14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 16:13:16 -0400 Subject: bcachefs: fix time_stats_to_text() Fixes: 7423330e30ab ("bcachefs: prt_printf() now respects \r\n\t") Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 138320eaa2ad..1b8554460af4 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -416,7 +416,6 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats printbuf_tabstop_push(out, TABSTOP_SIZE + 2); prt_printf(out, "\tsince mount\r\trecent\r\n"); - prt_printf(out, "recent"); printbuf_tabstops_reset(out); printbuf_tabstop_push(out, out->indent + 20); -- cgit From cecc328240609df17395dfd0ea03cc813d8be36d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 11:25:39 -0400 Subject: bcachefs: fix missing bch2_err_str() Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9138944c5ae6..267c2336b115 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2469,8 +2469,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino : bch2_inode_unpack(inode_k, &inode); if (ret) { /* Should have been caught in dirents pass */ - if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error looking up parent directory: %i", ret); + bch_err_msg(c, ret, "error looking up parent directory"); break; } -- cgit From 1dceae4cc12aa6389d9a8706f0d2a94d1679e79d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 12:10:33 -0400 Subject: bcachefs: unlock_long() before resort in journal replay Fix another SRCU splat - this one pretty harmless. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d89eb43c5ce9..11368dfa96b2 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -322,6 +322,7 @@ int bch2_journal_replay(struct bch_fs *c) } } + bch2_trans_unlock_long(trans); /* * Now, replay any remaining keys in the order in which they appear in * the journal, unpinning those journal entries as we go: -- cgit From 3c5d0b72a8e8c19c960e8fefb7463067e58b6bc4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 15:22:55 -0400 Subject: bcachefs: fix failure to relock in bch2_btree_node_mem_alloc() We weren't always so strict about trans->locked state - but now we are, and new assertions are shaking some bugs out. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 17 ++++++++++++++ fs/bcachefs/btree_cache.h | 2 ++ fs/bcachefs/btree_update_interior.c | 46 ++++++++++++++++++++----------------- 3 files changed, 44 insertions(+), 21 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index cc778d7e769e..063725ecb2b3 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return b; } +void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) +{ + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + /* Btree in memory cache - hash table */ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) @@ -736,6 +746,13 @@ out: start_time); memalloc_nofs_restore(flags); + + int ret = bch2_trans_relock(trans); + if (unlikely(ret)) { + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); + } + return b; err: mutex_lock(&bc->lock); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index c0eb87a057cc..f82064007127 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -12,6 +12,8 @@ struct btree_iter; void bch2_recalc_btree_reserve(struct bch_fs *); +void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *); + void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b3454d4619e8..8fd112026e7a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -317,6 +317,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, : 0; int ret; + b = bch2_btree_node_mem_alloc(trans, interior_node); + if (IS_ERR(b)) + return b; + + BUG_ON(b->ob.nr); + mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { struct btree_alloc *a = @@ -325,10 +331,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, obs = a->ob; bkey_copy(&tmp.k, &a->k); mutex_unlock(&c->btree_reserve_cache_lock); - goto mem_alloc; + goto out; } mutex_unlock(&c->btree_reserve_cache_lock); - retry: ret = bch2_alloc_sectors_start_trans(trans, c->opts.metadata_target ?: @@ -341,7 +346,7 @@ retry: c->opts.metadata_replicas_required), watermark, 0, cl, &wp); if (unlikely(ret)) - return ERR_PTR(ret); + goto err; if (wp->sectors_free < btree_sectors(c)) { struct open_bucket *ob; @@ -360,19 +365,16 @@ retry: bch2_open_bucket_get(c, wp, &obs); bch2_alloc_sectors_done(c, wp); -mem_alloc: - b = bch2_btree_node_mem_alloc(trans, interior_node); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - - /* we hold cannibalize_lock: */ - BUG_ON(IS_ERR(b)); - BUG_ON(b->ob.nr); - +out: bkey_copy(&b->key, &tmp.k); b->ob = obs; + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); return b; +err: + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); } static struct btree *bch2_btree_node_alloc(struct btree_update *as, @@ -2439,6 +2441,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite } new_hash = bch2_btree_node_mem_alloc(trans, false); + ret = PTR_ERR_OR_ZERO(new_hash); + if (ret) + goto err; } path->intent_ref++; @@ -2446,14 +2451,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite commit_flags, skip_triggers); --path->intent_ref; - if (new_hash) { - mutex_lock(&c->btree_cache.lock); - list_move(&new_hash->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&new_hash->c.lock); - six_unlock_intent(&new_hash->c.lock); - } + if (new_hash) + bch2_btree_node_to_freelist(c, new_hash); +err: closure_sync(&cl); bch2_btree_cache_cannibalize_unlock(trans); return ret; @@ -2522,6 +2522,10 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id b = bch2_btree_node_mem_alloc(trans, false); bch2_btree_cache_cannibalize_unlock(trans); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret; + set_btree_node_fake(b); set_btree_node_need_rewrite(b); b->c.level = level; @@ -2553,7 +2557,7 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, bch2_btree_root_alloc_fake_trans(trans, id, level)); + bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); } static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) -- cgit From 5dbfc4ef72f15508882aff58c307b8425cf037a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 15:04:15 -0400 Subject: bcachefs: fix failure to relock in btree_node_fill() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 063725ecb2b3..e52a06d3418c 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -873,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, bch2_btree_node_read(trans, b, sync); + int ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + if (!sync) return NULL; -- cgit From 6575b8c9877c3dd1f7db1d0d61bd250a0bf18b6d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 19:31:20 -0400 Subject: bcachefs: Fix locking in bch2_ioc_setlabel() Fixes: 7a254053a590 ("bcachefs: support FS_IOC_SETFSLABEL") Reported-by: syzbot+7e9efdfec27fbde0141d@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index aea8132d2c40..99c7fe987c74 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -328,9 +328,8 @@ static int bch2_ioc_setlabel(struct bch_fs *c, mutex_lock(&c->sb_lock); strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); - mutex_unlock(&c->sb_lock); - ret = bch2_write_super(c); + mutex_unlock(&c->sb_lock); mnt_drop_write_file(file); return ret; -- cgit From cab18be6957b6af8cbe3502fd5f6d7b9f02ccceb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 20:49:07 -0400 Subject: bcachefs: Fix replay_now_at() assert Journal replay, in the slowpath where we insert keys in journal order, was inserting keys in the wrong order; keys from early repair come last. Reported-by: syzbot+2c4fcb257ce2b6a29d0e@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 11368dfa96b2..36de1c6fe8c3 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -241,7 +241,13 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - return cmp_int(l->journal_seq, r->journal_seq); + /* + * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last + * + * journal_seq == 0 means that the key comes from early repair, and + * should be inserted last so as to avoid overflowing the journal + */ + return cmp_int(l->journal_seq - 1, r->journal_seq - 1); } int bch2_journal_replay(struct bch_fs *c) -- cgit From bdbdd4759f081ca2d0a5d9e8af21d742ffaf8439 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 21:10:45 -0400 Subject: bcachefs: Fix missing validation in bch2_sb_journal_v2_validate() Reported-by: syzbot+47ecc948aadfb2ab3efc@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_sb.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index db80e506e3ab..62b910f2fb27 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -104,6 +104,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); int ret = -BCH_ERR_invalid_sb_journal; + u64 sum = 0; unsigned nr; unsigned i; struct u64_range *b; @@ -119,6 +120,15 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f for (i = 0; i < nr; i++) { b[i].start = le64_to_cpu(journal->d[i].start); b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr); + + if (b[i].end <= b[i].start) { + prt_printf(err, "journal buckets entry with bad nr: %llu+%llu", + le64_to_cpu(journal->d[i].start), + le64_to_cpu(journal->d[i].nr)); + goto err; + } + + sum += le64_to_cpu(journal->d[i].nr); } sort(b, nr, sizeof(*b), u64_range_cmp, NULL); @@ -148,6 +158,11 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f } } + if (sum > UINT_MAX) { + prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX); + goto err; + } + ret = 0; err: kfree(b); -- cgit From 7f2de6947f92cfa4be8e5eaa1237e962bb8ee65f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:27:45 -0400 Subject: bcachefs: Fix warning in bch2_fs_journal_stop() j->last_empty_seq needs to match j->seq when the journal is empty Reported-by: syzbot+4093905737cf289b6b38@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 649e3a01608a..f5f7db50ca31 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1260,7 +1260,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) } if (!had_entries) - j->last_empty_seq = cur_seq; + j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); -- cgit From 8ed823b19214e403ca485532f48c0e02035021ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:57:56 -0400 Subject: bcachefs: Fix compat issue with old alloc_v4 keys we allow new fields to be added to existing key types, and new versions should treat them as being zeroed; this was not handled in alloc_v4_validate. Reported-by: syzbot+3b2968fa4953885dd66a@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 50 ++++++++++++++++++----------------- fs/bcachefs/alloc_background_format.h | 1 + fs/bcachefs/btree_iter.h | 9 +++++++ 3 files changed, 36 insertions(+), 24 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ac933142aeda..dc97b1f8bc08 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -240,71 +240,73 @@ fsck_err: int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags) { - struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + struct bch_alloc_v4 a; int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k)); + + bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k), c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", - alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); + alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k)); - bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) && + BCH_ALLOC_V4_NR_BACKPOINTERS(&a), c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type, c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", - a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + a.data_type, alloc_data_type(a, a.data_type)); for (unsigned i = 0; i < 2; i++) - bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, + bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX, c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", - a.v->io_time[i], LRU_TIME_MAX); + a.io_time[i], LRU_TIME_MAX); - unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) > + unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) > offsetof(struct bch_alloc_v4, stripe_sectors) - ? a.v->stripe_sectors + ? a.stripe_sectors : 0; - switch (a.v->data_type) { + switch (a.data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: bkey_fsck_err_on(stripe_sectors || - a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe, + a.dirty_sectors || + a.cached_sectors || + a.stripe, c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, - a.v->dirty_sectors, - a.v->cached_sectors, - a.v->stripe); + a.dirty_sectors, + a.cached_sectors, + a.stripe); break; case BCH_DATA_sb: case BCH_DATA_journal: case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - bkey_fsck_err_on(!a.v->dirty_sectors && + bkey_fsck_err_on(!a.dirty_sectors && !stripe_sectors, c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", - bch2_data_type_str(a.v->data_type)); + bch2_data_type_str(a.data_type)); break; case BCH_DATA_cached: - bkey_fsck_err_on(!a.v->cached_sectors || - a.v->dirty_sectors || + bkey_fsck_err_on(!a.cached_sectors || + a.dirty_sectors || stripe_sectors || - a.v->stripe, + a.stripe, c, alloc_key_cached_inconsistency, "data type inconsistency"); - bkey_fsck_err_on(!a.v->io_time[READ] && + bkey_fsck_err_on(!a.io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index 47d9d006502c..f754a2951d8a 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -69,6 +69,7 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; + /* end of fields in original version of alloc_v4 */ __u64 fragmentation_lru; __u32 stripe_sectors; __u32 pad; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index dca62375d7d3..222b7ce8a901 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +#define bkey_val_copy(_dst_v, _src_k) \ +do { \ + unsigned b = min_t(unsigned, sizeof(*_dst_v), \ + bkey_val_bytes(_src_k.k)); \ + memcpy(_dst_v, _src_k.v, b); \ + if (b < sizeof(*_dst_v)) \ + memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ +} while (0) + static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned btree_id, struct bpos pos, unsigned flags, unsigned type, -- cgit From 0b50b7313ef2494926df30ce8e2ce284f1b847fc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 23:21:52 -0400 Subject: bcachefs: Fix refcounting in discard path bch_dev->io_ref does not protect against the filesystem going away; bch_fs->writes does. Thus the filesystem write ref needs to be the last ref we release. Reported-by: syzbot+9e0404b505e604f67e41@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index dc97b1f8bc08..ba46f1c1d78a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1874,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); - bch2_write_ref_put(c, BCH_WRITE_REF_discard); percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_dev_do_discards(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) return; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) - goto put_ioref; + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + goto put_write_ref; if (queue_work(c->write_ref_wq, &ca->discard_work)) return; - bch2_write_ref_put(c, BCH_WRITE_REF_discard); -put_ioref: percpu_ref_put(&ca->io_ref); +put_write_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_do_discards(struct bch_fs *c) -- cgit From dedb2fe37574857c84e9598b9f5272505dedf7af Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 22 Aug 2024 14:21:58 +0800 Subject: bcachefs: Fix double assignment in check_dirent_to_subvol() ret was assigned twice in check_dirent_to_subvol(). Reported by cocci. Signed-off-by: Yuesong Li Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 267c2336b115..6801c37ee803 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2006,7 +2006,6 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (ret) { bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); ret = -BCH_ERR_fsck_repair_unimplemented; - ret = 0; goto err; } -- cgit From 87313ac1f134d6ee1e7c858da8bdea9147b537a9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 02:13:02 -0400 Subject: bcachefs: clear path->should_be_locked in bch2_btree_key_cache_drop() bch2_btree_key_cache_drop() evicts the key cache entry - it's used when we're doing an update that bypasses the key cache, because for cache coherency reasons a key can't be in the key cache unless it also exists in the btree - i.e. creates have to bypass the cache. After evicting, the path no longer points to a key cache key, and relock() will always fail if should_be_locked is true. Prep for improving path->should_be_locked assertions Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 79954490627c..9b3ec2a3b8ce 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -726,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + path->should_be_locked = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, -- cgit From 3e878fe5a0b139838a65f50a3df3caf3299dbc24 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 03:57:39 -0400 Subject: bcachefs: add missing inode_walker_exit() fix a small leak Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 6801c37ee803..83bd31b44aad 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2215,6 +2215,8 @@ int bch2_check_xattrs(struct bch_fs *c) NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); + + inode_walker_exit(&inode); bch_err_fn(c, ret); return ret; } -- cgit From a592cdf5164d3feb821085df71f63e70e8b8b08c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 16:41:00 -0400 Subject: bcachefs: don't use rht_bucket() in btree_key_cache_scan() rht_bucket() does strange complicated things when a rehash is in progress. Instead, just skip scanning when a rehash is in progress: scanning is going to be more expensive (many more empty slots to cover), and some sort of infinite loop is being observed Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 9b3ec2a3b8ce..fda7998734cb 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -778,6 +778,20 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); + + /* + * Scanning is expensive while a rehash is in progress - most elements + * will be on the new hashtable, if it's in progress + * + * A rehash could still start while we're scanning - that's ok, we'll + * still see most elements. + */ + if (unlikely(tbl->nest)) { + rcu_read_unlock(); + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + return SHRINK_STOP; + } + if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; start = bc->shrink_iter; @@ -785,7 +799,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, do { struct rhash_head *pos, *next; - pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); + pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]); while (!rht_is_a_nulls(pos)) { next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); @@ -866,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) while (atomic_long_read(&bc->nr_keys)) { rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); - if (tbl) + if (tbl) { + if (tbl->nest) { + /* wait for in progress rehash */ + rcu_read_unlock(); + mutex_lock(&bc->table.mutex); + mutex_unlock(&bc->table.mutex); + rcu_read_lock(); + continue; + } for (i = 0; i < tbl->size; i++) - rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { + while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) { + ck = container_of(pos, struct bkey_cached, hash); bkey_cached_evict(bc, ck); list_add(&ck->list, &items); } + } rcu_read_unlock(); } -- cgit From d3204616a67e53fdcad14c7026869330fb382fd4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Aug 2024 17:38:41 -0400 Subject: bcachefs: Fix failure to flush moves before sleeping in copygc This fixes an apparent deadlock - rebalance would get stuck trying to take nocow locks because they weren't being released by copygc. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index deef4f024d20..d86565bf07c8 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -383,7 +383,7 @@ static int bch2_copygc_thread(void *arg) if (min_member_capacity == U64_MAX) min_member_capacity = 128 * 2048; - bch2_trans_unlock_long(ctxt.trans); + move_buckets_wait(&ctxt, buckets, true); bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), MAX_SCHEDULE_TIMEOUT); } -- cgit From 49aa7830396bce33b00fa7ee734c35de36521138 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Aug 2024 15:35:22 -0400 Subject: bcachefs: Fix rebalance_work accounting rebalance_work was keying off of the presence of rebelance_opts in the extent - but that was incorrect, we keep those around after rebalance for indirect extents since the inode's options are not directly available Fixes: 20ac515a9cc7 ("bcachefs: bch_acct_rebalance_work") Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/buckets.c | 74 ++++++++++++++++++++++++++++--------------- fs/bcachefs/extents.c | 39 +++++++++++++++++++++++ fs/bcachefs/extents.h | 1 + fs/bcachefs/sb-downgrade.c | 8 ++++- 5 files changed, 98 insertions(+), 27 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c75f2e0f32bb..14ce726bf5a3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -677,7 +677,8 @@ struct bch_sb_field_ext { x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ x(disk_accounting_v3, BCH_VERSION(1, 10)) \ - x(disk_accounting_inum, BCH_VERSION(1, 11)) + x(disk_accounting_inum, BCH_VERSION(1, 11)) \ + x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index be2bbd248631..a2274429e7f4 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -699,7 +699,8 @@ err: static int __trigger_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, - enum btree_iter_update_trigger_flags flags) + enum btree_iter_update_trigger_flags flags, + s64 *replicas_sectors) { bool gc = flags & BTREE_TRIGGER_gc; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -708,7 +709,6 @@ static int __trigger_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - s64 replicas_sectors = 0; int ret = 0; struct disk_accounting_pos acc_replicas_key = { @@ -739,7 +739,7 @@ static int __trigger_extent(struct btree_trans *trans, if (ret) return ret; } else if (!p.has_ec) { - replicas_sectors += disk_sectors; + *replicas_sectors += disk_sectors; acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev; } else { ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); @@ -777,7 +777,7 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc_replicas_key.replicas.nr_devs) { - ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -787,7 +787,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_snapshot, .snapshot.id = k.k->p.snapshot, }; - ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -807,7 +807,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_btree, .btree.id = btree_id, }; - ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc); if (ret) return ret; } else { @@ -819,22 +819,13 @@ static int __trigger_extent(struct btree_trans *trans, s64 v[3] = { insert ? 1 : -1, insert ? k.k->size : -((s64) k.k->size), - replicas_sectors, + *replicas_sectors, }; ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); if (ret) return ret; } - if (bch2_bkey_rebalance_opts(k)) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_rebalance_work, - }; - ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc); - if (ret) - return ret; - } - return 0; } @@ -843,6 +834,7 @@ int bch2_trigger_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; @@ -858,21 +850,53 @@ int bch2_trigger_extent(struct btree_trans *trans, new_ptrs_bytes)) return 0; - if (flags & BTREE_TRIGGER_transactional) { - struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - - (int) bch2_bkey_needs_rebalance(c, old); + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { + s64 old_replicas_sectors = 0, new_replicas_sectors = 0; + + if (old.k->type) { + int ret = __trigger_extent(trans, btree, level, old, + flags & ~BTREE_TRIGGER_insert, + &old_replicas_sectors); + if (ret) + return ret; + } + + if (new.k->type) { + int ret = __trigger_extent(trans, btree, level, new.s_c, + flags & ~BTREE_TRIGGER_overwrite, + &new_replicas_sectors); + if (ret) + return ret; + } + + int need_rebalance_delta = 0; + s64 need_rebalance_sectors_delta = 0; + + s64 s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta -= s != 0; + need_rebalance_sectors_delta -= s; - if (mod) { + s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta += s != 0; + need_rebalance_sectors_delta += s; + + if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - new.k->p, mod > 0); + new.k->p, need_rebalance_delta > 0); if (ret) return ret; } - } - if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) - return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags); + if (need_rebalance_sectors_delta) { + struct disk_accounting_pos acc = { + .type = BCH_DISK_ACCOUNTING_rebalance_work, + }; + int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1, + flags & BTREE_TRIGGER_gc); + if (ret) + return ret; + } + } return 0; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 9406f82fc255..e317df3644a1 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1379,6 +1379,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) return r != NULL; } +static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + u64 sectors = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { + sectors = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + sectors += p.crc.compressed_size; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) + sectors += p.crc.compressed_size; + } + + return sectors; +} + +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; +} + int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, struct bch_io_opts *opts) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 1a6ddee48041..709dd83183be 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -692,6 +692,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, unsigned, unsigned); bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, struct bch_io_opts *); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 650a1f77ca40..c7e4cdd3f6a5 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -74,6 +74,9 @@ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ BCH_FSCK_ERR_accounting_key_junk_at_end) \ x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(rebalance_work_acct_fix, \ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch) @@ -108,7 +111,10 @@ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ BCH_FSCK_ERR_accounting_replicas_not_marked, \ - BCH_FSCK_ERR_bkey_version_in_future) + BCH_FSCK_ERR_bkey_version_in_future) \ + x(rebalance_work_acct_fix, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) struct upgrade_downgrade_entry { u64 recovery_passes; -- cgit From 66927b89289974dab6d3b3cdd7706d0376034114 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 26 Aug 2024 15:11:38 -0400 Subject: bcachefs: Fix failure to return error in data_update_index_update() This fixes an assertion pop in io_write.c - if we don't return an error we're supposed to have completed all the btree updates. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 65176d51b502..004894ad4147 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -337,6 +337,7 @@ restart_drop_extra_replicas: printbuf_exit(&buf); bch2_fatal_error(c); + ret = -EIO; goto out; } -- cgit From d26935690c03fe8159d42358bed1c56252700cd1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 26 Aug 2024 19:11:00 -0400 Subject: bcachefs: Fix bch2_extents_match() false positive This was caught as a very rare nonce inconsistency, on systems with encryption and replication (and tiering, or some form of rebalance operation running): [Wed Jul 17 13:30:03 2024] about to insert invalid key in data update path [Wed Jul 17 13:30:03 2024] old: u64s 10 type extent 671283510:6392:U32_MAX len 16 ver 106595503: durability: 2 crc: c_size 8 size 16 offset 0 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 3:355968:104 gen 7 ptr: 4:513244:48 gen 6 rebalance: target hdd compression zstd [Wed Jul 17 13:30:03 2024] k: u64s 10 type extent 671283510:6400:U32_MAX len 16 ver 106595508: durability: 2 crc: c_size 8 size 16 offset 0 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 3:355968:112 gen 7 ptr: 4:513244:56 gen 6 rebalance: target hdd compression zstd [Wed Jul 17 13:30:03 2024] new: u64s 14 type extent 671283510:6392:U32_MAX len 8 ver 106595508: durability: 2 crc: c_size 8 size 16 offset 0 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 3:355968:112 gen 7 cached ptr: 4:513244:56 gen 6 cached rebalance: target hdd compression zstd crc: c_size 8 size 16 offset 8 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 1:10860085:32 gen 0 ptr: 0:17285918:408 gen 0 [Wed Jul 17 13:30:03 2024] bcachefs (cca5bc65-fe77-409d-a9fa-465a6e7f4eae): fatal error - emergency read only bch2_extents_match() was reporting true for extents that did not actually point to the same data. bch2_extent_match() iterates over pairs of pointers, looking for pointers that point to the same location on disk (with matching generation numbers). However one or both extents may have been trimmed (or merged) and they might not have the same disk offset: it corrects for this by subtracting the key offset and the checksum entry offset. However, this failed when an extent was immediately partially overwritten, and the new overwrite was allocated the next adjacent disk space. Normally, with compression off, this would never cause a bug, since the new extent would have to be immediately after the old extent for the pointer offsets to match, and the rebalance index update path is not looking for an extent outside the range of the extent it moved. However with compression enabled, extents take up less space on disk than they do in the btree index space - and spuriously matching after partial overwrite is possible. To fix this, add a secondary check, that strictly checks that the regions pointed to on disk overlap. https://github.com/koverstreet/bcachefs/issues/717 Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index e317df3644a1..eb31bda19544 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -929,8 +929,29 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) if (p1.ptr.dev == p2.ptr.dev && p1.ptr.gen == p2.ptr.gen && + + /* + * This checks that the two pointers point + * to the same region on disk - adjusting + * for the difference in where the extents + * start, since one may have been trimmed: + */ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == - (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) + (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k) && + + /* + * This additionally checks that the + * extents overlap on disk, since the + * previous check may trigger spuriously + * when one extent is immediately partially + * overwritten with another extent (so that + * on disk they are adjacent) and + * compression is in use: + */ + ((p1.ptr.offset >= p2.ptr.offset && + p1.ptr.offset < p2.ptr.offset + p2.crc.compressed_size) || + (p2.ptr.offset >= p1.ptr.offset && + p2.ptr.offset < p1.ptr.offset + p1.crc.compressed_size))) return true; return false; -- cgit From e3e6940940910c2287fe962bdf72015efd4fee81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 31 Aug 2024 17:44:51 -0400 Subject: bcachefs: Revert lockless buffered IO path We had a report of data corruption on nixos when building installer images. https://github.com/NixOS/nixpkgs/pull/321055#issuecomment-2184131334 It seems that writes are being dropped, but only when issued by QEMU, and possibly only in snapshot mode. It's undetermined if it's write calls are being dropped or dirty folios. Further testing, via minimizing the original patch to just the change that skips the inode lock on non appends/truncates, reveals that it really is just not taking the inode lock that causes the corruption: it has nothing to do with the other logic changes for preserving write atomicity in corner cases. It's also kernel config dependent: it doesn't reproduce with the minimal kernel config that ktest uses, but it does reproduce with nixos's distro config. Bisection the kernel config initially pointer the finger at page migration or compaction, but it appears that was erroneous; we haven't yet determined what kernel config option actually triggers it. Sadly it appears this will have to be reverted since we're getting too close to release and my plate is full, but we'd _really_ like to fully debug it. My suspicion is that this patch is exposing a preexisting bug - the inode lock actually covers very little in IO paths, and we have a different lock (the pagecache add lock) that guards against races with truncate here. Fixes: 7e64c86cdc6c ("bcachefs: Buffered write path now can avoid the inode lock") Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 - fs/bcachefs/fs-io-buffered.c | 149 ++++++++++++------------------------------- 2 files changed, 40 insertions(+), 110 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ab5a7adece10..742dcdd3e5d7 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -257,7 +257,6 @@ x(BCH_ERR_nopromote, nopromote_in_flight) \ x(BCH_ERR_nopromote, nopromote_no_writes) \ x(BCH_ERR_nopromote, nopromote_enomem) \ - x(0, need_inode_lock) \ x(0, invalid_snapshot_node) \ x(0, option_needs_open_fs) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 184d03851676..ec8c427bf588 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -802,8 +802,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi) static int __bch2_buffered_write(struct bch_inode_info *inode, struct address_space *mapping, struct iov_iter *iter, - loff_t pos, unsigned len, - bool inode_locked) + loff_t pos, unsigned len) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation res; @@ -827,15 +826,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, BUG_ON(!fs.nr); - /* - * If we're not using the inode lock, we need to lock all the folios for - * atomiticity of writes vs. other writes: - */ - if (!inode_locked && folio_end_pos(darray_last(fs)) < end) { - ret = -BCH_ERR_need_inode_lock; - goto out; - } - f = darray_first(fs); if (pos != folio_pos(f) && !folio_test_uptodate(f)) { ret = bch2_read_single_folio(f, mapping); @@ -932,10 +922,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, end = pos + copied; spin_lock(&inode->v.i_lock); - if (end > inode->v.i_size) { - BUG_ON(!inode_locked); + if (end > inode->v.i_size) i_size_write(&inode->v, end); - } spin_unlock(&inode->v.i_lock); f_pos = pos; @@ -979,68 +967,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct bch_inode_info *inode = file_bch_inode(file); - loff_t pos; - bool inode_locked = false; - ssize_t written = 0, written2 = 0, ret = 0; - - /* - * We don't take the inode lock unless i_size will be changing. Folio - * locks provide exclusion with other writes, and the pagecache add lock - * provides exclusion with truncate and hole punching. - * - * There is one nasty corner case where atomicity would be broken - * without great care: when copying data from userspace to the page - * cache, we do that with faults disable - a page fault would recurse - * back into the filesystem, taking filesystem locks again, and - * deadlock; so it's done with faults disabled, and we fault in the user - * buffer when we aren't holding locks. - * - * If we do part of the write, but we then race and in the userspace - * buffer have been evicted and are no longer resident, then we have to - * drop our folio locks to re-fault them in, breaking write atomicity. - * - * To fix this, we restart the write from the start, if we weren't - * holding the inode lock. - * - * There is another wrinkle after that; if we restart the write from the - * start, and then get an unrecoverable error, we _cannot_ claim to - * userspace that we did not write data we actually did - so we must - * track (written2) the most we ever wrote. - */ - - if ((iocb->ki_flags & IOCB_APPEND) || - (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) { - inode_lock(&inode->v); - inode_locked = true; - } - - ret = generic_write_checks(iocb, iter); - if (ret <= 0) - goto unlock; - - ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0); - if (ret) { - if (!inode_locked) { - inode_lock(&inode->v); - inode_locked = true; - ret = file_remove_privs_flags(file, 0); - } - if (ret) - goto unlock; - } - - ret = file_update_time(file); - if (ret) - goto unlock; - - pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; + ssize_t written = 0; + int ret = 0; bch2_pagecache_add_get(inode); - if (!inode_locked && - (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) - goto get_inode_lock; - do { unsigned offset = pos & (PAGE_SIZE - 1); unsigned bytes = iov_iter_count(iter); @@ -1065,17 +997,12 @@ again: } } - if (unlikely(bytes != iov_iter_count(iter) && !inode_locked)) - goto get_inode_lock; - if (unlikely(fatal_signal_pending(current))) { ret = -EINTR; break; } - ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked); - if (ret == -BCH_ERR_need_inode_lock) - goto get_inode_lock; + ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); if (unlikely(ret < 0)) break; @@ -1096,46 +1023,50 @@ again: } pos += ret; written += ret; - written2 = max(written, written2); - - if (ret != bytes && !inode_locked) - goto get_inode_lock; ret = 0; balance_dirty_pages_ratelimited(mapping); - - if (0) { -get_inode_lock: - bch2_pagecache_add_put(inode); - inode_lock(&inode->v); - inode_locked = true; - bch2_pagecache_add_get(inode); - - iov_iter_revert(iter, written); - pos -= written; - written = 0; - ret = 0; - } } while (iov_iter_count(iter)); - bch2_pagecache_add_put(inode); -unlock: - if (inode_locked) - inode_unlock(&inode->v); - iocb->ki_pos += written; + bch2_pagecache_add_put(inode); - ret = max(written, written2) ?: ret; - if (ret > 0) - ret = generic_write_sync(iocb, ret); - return ret; + return written ? written : ret; } -ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter) +ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) { - ssize_t ret = iocb->ki_flags & IOCB_DIRECT - ? bch2_direct_write(iocb, iter) - : bch2_buffered_write(iocb, iter); + struct file *file = iocb->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + ssize_t ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = bch2_direct_write(iocb, from); + goto out; + } + + inode_lock(&inode->v); + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto unlock; + + ret = file_remove_privs(file); + if (ret) + goto unlock; + + ret = file_update_time(file); + if (ret) + goto unlock; + + ret = bch2_buffered_write(iocb, from); + if (likely(ret > 0)) + iocb->ki_pos += ret; +unlock: + inode_unlock(&inode->v); + if (ret > 0) + ret = generic_write_sync(iocb, ret); +out: return bch2_err_class(ret); } -- cgit From 3d3020c461936009dc58702e267ff67b0076cbf2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 11:47:32 -0400 Subject: bcachefs: Mark more errors as autofix errors that are known to always be safe to fix should be autofix: this should be most errors even at this point, but that will need some thorough review. note that errors are still logged in the superblock, so we'll still know that they happened. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d3a498617303..f0c14702f9e6 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -23,7 +23,7 @@ enum bch_fsck_flags { x(jset_past_bucket_end, 9, 0) \ x(jset_seq_blacklisted, 10, 0) \ x(journal_entries_missing, 11, 0) \ - x(journal_entry_replicas_not_marked, 12, 0) \ + x(journal_entry_replicas_not_marked, 12, FSCK_AUTOFIX) \ x(journal_entry_past_jset_end, 13, 0) \ x(journal_entry_replicas_data_mismatch, 14, 0) \ x(journal_entry_bkey_u64s_0, 15, 0) \ @@ -288,10 +288,10 @@ enum bch_fsck_flags { x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ - x(accounting_key_junk_at_end, 277, 0) \ - x(accounting_key_replicas_nr_devs_0, 278, 0) \ - x(accounting_key_replicas_nr_required_bad, 279, 0) \ - x(accounting_key_replicas_devs_unsorted, 280, 0) \ + x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \ + x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ + x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ + x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit From 7f12a963b65872fda1219f065c1cc1b1b9a806e8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Sep 2024 15:53:03 -0400 Subject: bcachefs: fix rebalance accounting Fixes: 49aa7830396b ("bcachefs: Fix rebalance_work accounting") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a2274429e7f4..20219c1e6ddf 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -876,7 +876,7 @@ int bch2_trigger_extent(struct btree_trans *trans, need_rebalance_delta -= s != 0; need_rebalance_sectors_delta -= s; - s = bch2_bkey_sectors_need_rebalance(c, old); + s = bch2_bkey_sectors_need_rebalance(c, new.s_c); need_rebalance_delta += s != 0; need_rebalance_sectors_delta += s; -- cgit From 53f6619554fb1edf8d7599b560d44dbea085c730 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Sep 2024 18:09:18 -0400 Subject: bcachefs: BCH_SB_MEMBER_INVALID Create a sentinal value for "invalid device". This is needed for removing devices that have stripes on them (force removing, without evacuating); we need a sentinal value for the stripe pointers to the device being removed. Signed-off-by: Kent Overstreet --- fs/bcachefs/replicas.c | 3 ++- fs/bcachefs/sb-members.c | 3 ++- fs/bcachefs/sb-members_format.h | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 12b1d28b7eb4..12d4de65ae17 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -82,7 +82,8 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, } for (unsigned i = 0; i < r->nr_devs; i++) - if (!bch2_member_exists(sb, r->devs[i])) { + if (r->devs[i] != BCH_SB_MEMBER_INVALID && + !bch2_member_exists(sb, r->devs[i])) { prt_printf(err, "invalid device %u in entry ", r->devs[i]); goto bad; } diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 39196f2a4197..4b765422dd77 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -11,7 +11,8 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) { - bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); + if (dev != BCH_SB_MEMBER_INVALID) + bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h index e2630548c0f6..d727d2dfda08 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h @@ -8,6 +8,11 @@ */ #define BCH_SB_MEMBERS_MAX 64 +/* + * Sentinal value - indicates a device that does not exist + */ +#define BCH_SB_MEMBER_INVALID 255 + #define BCH_MIN_NR_NBUCKETS (1 << 6) #define BCH_IOPS_MEASUREMENTS() \ -- cgit From 5a6e43af1e5341a11a719270cef9d846b0bbfaa5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Sep 2024 15:48:59 -0400 Subject: bcachefs: Fix ca->io_ref usage ca->io_ref does not protect against the filesystem going way, c->write_ref does. Much like 0b50b7313ef2 bcachefs: Fix refcounting in discard path the other async paths need fixing. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ba46f1c1d78a..dc3a4024aab6 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1968,8 +1968,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work) break; } - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); } static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) @@ -1979,18 +1979,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) if (discard_in_flight_add(ca, bucket, false)) return; - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) return; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) - goto put_ioref; + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + goto put_ref; if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) return; - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); -put_ioref: percpu_ref_put(&ca->io_ref); +put_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); } static int invalidate_one_bucket(struct btree_trans *trans, @@ -2132,26 +2132,26 @@ static void bch2_do_invalidates_work(struct work_struct *work) bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } void bch2_dev_do_invalidates(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) return; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) - goto put_ioref; + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + goto put_ref; if (queue_work(c->write_ref_wq, &ca->invalidate_work)) return; - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); -put_ioref: percpu_ref_put(&ca->io_ref); +put_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } void bch2_do_invalidates(struct bch_fs *c) -- cgit From ec36573dcd7b160bb9c5e6b20a43d484bc761d2e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Sep 2024 15:43:03 -0400 Subject: bcachefs: Add a cond_resched() to __journal_keys_sort() Without this, we'd potentially sort multiple times without a cond_resched(), leading to hung task warnings on larger systems. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 74933490aaba..c1657182c275 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -530,6 +530,8 @@ static void __journal_keys_sort(struct journal_keys *keys) { sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL); + cond_resched(); + struct journal_key *dst = keys->data; darray_for_each(*keys, src) { -- cgit From df88febc2065ae64779f295df0a5d4f52e5591e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Sep 2024 17:49:20 -0400 Subject: bcachefs: Simplify bch2_bkey_drop_ptrs() bch2_bkey_drop_ptrs() had a some complicated machinery for avoiding O(n^2) when dropping multiple pointers - but when n is only going to be ~4, it's not worth it. Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 21 +++++---------------- fs/bcachefs/extents.h | 23 +++++++++-------------- 2 files changed, 14 insertions(+), 30 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index eb31bda19544..1a0c714c13e2 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -781,12 +781,10 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, /* * Returns pointer to the next entry after the one being dropped: */ -union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k, - struct bch_extent_ptr *ptr) +void bch2_bkey_drop_ptr_noerror(struct bkey_s k, struct bch_extent_ptr *ptr) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *entry = to_entry(ptr), *next; - union bch_extent_entry *ret = entry; bool drop_crc = true; EBUG_ON(ptr < &ptrs.start->ptr || @@ -811,21 +809,16 @@ union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k, break; if ((extent_entry_is_crc(entry) && drop_crc) || - extent_entry_is_stripe_ptr(entry)) { - ret = (void *) ret - extent_entry_bytes(entry); + extent_entry_is_stripe_ptr(entry)) extent_entry_drop(k, entry); - } } - - return ret; } -union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, - struct bch_extent_ptr *ptr) +void bch2_bkey_drop_ptr(struct bkey_s k, struct bch_extent_ptr *ptr) { bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr; - union bch_extent_entry *ret = - bch2_bkey_drop_ptr_noerror(k, ptr); + + bch2_bkey_drop_ptr_noerror(k, ptr); /* * If we deleted all the dirty pointers and there's still cached @@ -837,14 +830,10 @@ union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, !bch2_bkey_dirty_devs(k.s_c).nr) { k.k->type = KEY_TYPE_error; set_bkey_val_u64s(k.k, 0); - ret = NULL; } else if (!bch2_bkey_nr_ptrs(k.s_c)) { k.k->type = KEY_TYPE_deleted; set_bkey_val_u64s(k.k, 0); - ret = NULL; } - - return ret; } void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 709dd83183be..42a7c6d820a0 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -649,26 +649,21 @@ static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr void bch2_extent_ptr_decoded_append(struct bkey_i *, struct extent_ptr_decoded *); -union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s, - struct bch_extent_ptr *); -union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, - struct bch_extent_ptr *); +void bch2_bkey_drop_ptr_noerror(struct bkey_s, struct bch_extent_ptr *); +void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); #define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \ do { \ - struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \ + __label__ _again; \ + struct bkey_ptrs _ptrs; \ +_again: \ + _ptrs = bch2_bkey_ptrs(_k); \ \ - struct bch_extent_ptr *_ptr = &_ptrs.start->ptr; \ - \ - while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \ + bkey_for_each_ptr(_ptrs, _ptr) \ if (_cond) { \ - _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \ - _ptrs = bch2_bkey_ptrs(_k); \ - continue; \ + bch2_bkey_drop_ptr(_k, _ptr); \ + goto _again; \ } \ - \ - (_ptr)++; \ - } \ } while (0) bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c, -- cgit From 52df04f03994217aa5f98eb83255e85ee60b5e29 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Sep 2024 17:50:20 -0400 Subject: bcachefs: More BCH_SB_MEMBER_INVALID support Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 15 ++++++++------- fs/bcachefs/ec.h | 4 +++- fs/bcachefs/extents.c | 5 +++++ fs/bcachefs/replicas.c | 2 +- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 20219c1e6ddf..721bbe1dffc1 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -100,12 +100,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); if (!ca) { - if (fsck_err(trans, ptr_to_invalid_device, - "pointer to missing device %u\n" - "while marking %s", - p.ptr.dev, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, + trans, ptr_to_invalid_device, + "pointer to missing device %u\n" + "while marking %s", + p.ptr.dev, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) *do_update = true; return 0; } @@ -562,7 +563,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); if (unlikely(!ca)) { - if (insert) + if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) ret = -EIO; goto err; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 90962b3c0130..9baf3411a8f9 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -97,7 +97,9 @@ static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe const struct bch_extent_ptr *data_ptr, unsigned sectors) { - return data_ptr->dev == stripe_ptr->dev && + return (data_ptr->dev == stripe_ptr->dev || + data_ptr->dev == BCH_SB_MEMBER_INVALID || + stripe_ptr->dev == BCH_SB_MEMBER_INVALID) && data_ptr->gen == stripe_ptr->gen && data_ptr->offset >= stripe_ptr->offset && data_ptr->offset < stripe_ptr->offset + sectors; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 1a0c714c13e2..324303bf4353 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -787,6 +787,11 @@ void bch2_bkey_drop_ptr_noerror(struct bkey_s k, struct bch_extent_ptr *ptr) union bch_extent_entry *entry = to_entry(ptr), *next; bool drop_crc = true; + if (k.k->type == KEY_TYPE_stripe) { + ptr->dev = BCH_SB_MEMBER_INVALID; + return; + } + EBUG_ON(ptr < &ptrs.start->ptr || ptr >= &ptrs.end->ptr); EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 12d4de65ae17..1f34c92a6d11 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -796,7 +796,7 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, nr_online += test_bit(e->devs[i], devs.d); struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]); - nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed; + nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; } rcu_read_unlock(); -- cgit From 2c377d8a71db32d4125d30b3641f2bc51c6850ca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Sep 2024 21:18:57 -0400 Subject: bcachefs: fix btree_key_cache sysfs knob Signed-off-by: Kent Overstreet --- fs/bcachefs/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index f393023a3ae2..33f2a64c14c9 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -461,7 +461,7 @@ STORE(bch2_fs) sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); - c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc); + c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc); } if (attr == &sysfs_trigger_gc) -- cgit From 16005147cca41a0f67b5def2a4656286f8c0db4a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Sep 2024 01:06:57 -0400 Subject: bcachefs: Don't delete open files in online fsck If a file is unlinked but still open, we don't want online fsck to delete it - or fun inconsistencies will happen. https://github.com/koverstreet/bcachefs/issues/727 Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 8 ++++++++ fs/bcachefs/fs.h | 7 +++++++ fs/bcachefs/fsck.c | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+) (limited to 'fs/bcachefs') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 94c392abef65..257f07656e5f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -177,6 +177,14 @@ static unsigned bch2_inode_hash(subvol_inum inum) return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL); } +struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +{ + return to_bch_ei(ilookup5_nowait(c->vfs_sb, + bch2_inode_hash(inum), + bch2_iget5_test, + &inum)); +} + static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode) { subvol_inum inum = inode_inum(inode); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index c3af7225ff69..990ec43e0365 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -56,6 +56,8 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode) }; } +struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum); + /* * Set if we've gotten a btree error for this inode, and thus the vfs inode and * btree inode may be inconsistent: @@ -194,6 +196,11 @@ int bch2_vfs_init(void); #define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); }) +static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +{ + return NULL; +} + static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) {} static inline void bch2_vfs_exit(void) {} diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 83bd31b44aad..9b3470a97546 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -8,6 +8,7 @@ #include "darray.h" #include "dirent.h" #include "error.h" +#include "fs.h" #include "fs-common.h" #include "fsck.h" #include "inode.h" @@ -962,6 +963,22 @@ fsck_err: return ret; } +static bool bch2_inode_open(struct bch_fs *c, struct bpos p) +{ + subvol_inum inum = { + .subvol = snapshot_t(c, p.snapshot)->subvol, + .inum = p.offset, + }; + + /* snapshot tree corruption, can't safely delete */ + if (!inum.subvol) { + bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot); + return true; + } + + return __bch2_inode_hash_find(c, inum) != NULL; +} + static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -1040,6 +1057,7 @@ static int check_inode(struct btree_trans *trans, } if (u.bi_flags & BCH_INODE_unlinked && + !bch2_inode_open(c, k.k->p) && (!c->sb.clean || fsck_err(trans, inode_unlinked_but_clean, "filesystem marked clean, but inode %llu unlinked", -- cgit