diff options
Diffstat (limited to 'fs/bcachefs/extents.c')
-rw-r--r-- | fs/bcachefs/extents.c | 290 |
1 files changed, 93 insertions, 197 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 37e3d69bec06..05d5f71a7ca9 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -21,6 +21,7 @@ #include "extents.h" #include "inode.h" #include "journal.h" +#include "rebalance.h" #include "replicas.h" #include "super.h" #include "super-io.h" @@ -88,6 +89,14 @@ static inline bool ptr_better(struct bch_fs *c, u64 l1 = dev_latency(c, p1.ptr.dev); u64 l2 = dev_latency(c, p2.ptr.dev); + /* + * Square the latencies, to bias more in favor of the faster + * device - we never want to stop issuing reads to the slower + * device altogether, so that we can update our latency numbers: + */ + l1 *= l1; + l2 *= l2; + /* Pick at random, biased in favor of the faster device: */ return bch2_rand_range(l1 + l2) > l1; @@ -169,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { int ret = 0; @@ -177,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_validate(c, k, flags); + ret = bch2_bkey_ptrs_validate(c, k, from); fsck_err: return ret; } @@ -189,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, } int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; @@ -203,12 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, c, btree_ptr_v2_min_key_bad, "min_key > key"); - if (flags & BCH_VALIDATE_write) + if ((from.flags & BCH_VALIDATE_write) && + c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) bkey_fsck_err_on(!bp.v->sectors_written, c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_validate(c, k, flags); + ret = bch2_bkey_ptrs_validate(c, k, from); fsck_err: return ret; } @@ -395,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; @@ -1120,6 +1130,57 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr bch2_prt_compression_type(out, crc->compression_type); } +static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, + const struct bch_extent_rebalance *r) +{ + prt_str(out, "rebalance:"); + + prt_printf(out, " replicas=%u", r->data_replicas); + if (r->data_replicas_from_inode) + prt_str(out, " (inode)"); + + prt_str(out, " checksum="); + bch2_prt_csum_opt(out, r->data_checksum); + if (r->data_checksum_from_inode) + prt_str(out, " (inode)"); + + if (r->background_compression || r->background_compression_from_inode) { + prt_str(out, " background_compression="); + bch2_compression_opt_to_text(out, r->background_compression); + + if (r->background_compression_from_inode) + prt_str(out, " (inode)"); + } + + if (r->background_target || r->background_target_from_inode) { + prt_str(out, " background_target="); + if (c) + bch2_target_to_text(out, c, r->background_target); + else + prt_printf(out, "%u", r->background_target); + + if (r->background_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->promote_target || r->promote_target_from_inode) { + prt_str(out, " promote_target="); + if (c) + bch2_target_to_text(out, c, r->promote_target); + else + prt_printf(out, "%u", r->promote_target); + + if (r->promote_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->erasure_code || r->erasure_code_from_inode) { + prt_printf(out, " ec=%u", r->erasure_code); + if (r->erasure_code_from_inode) + prt_str(out, " (inode)"); + } +} + void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { @@ -1155,18 +1216,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, (u64) ec->idx, ec->block); break; } - case BCH_EXTENT_ENTRY_rebalance: { - const struct bch_extent_rebalance *r = &entry->rebalance; - - prt_str(out, "rebalance: target "); - if (c) - bch2_target_to_text(out, c, r->target); - else - prt_printf(out, "%u", r->target); - prt_str(out, " compression "); - bch2_compression_opt_to_text(out, r->compression); + case BCH_EXTENT_ENTRY_rebalance: + bch2_extent_rebalance_to_text(out, c, &entry->rebalance); break; - } + default: prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; @@ -1178,13 +1231,19 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, static int extent_ptr_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, + struct bkey_validate_context from, const struct bch_extent_ptr *ptr, unsigned size_ondisk, bool metadata) { int ret = 0; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr(ptrs, ptr2) + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, + "multiple pointers to same device (%u)", ptr->dev); + /* bad pointers are repaired by check_fix_ptrs(): */ rcu_read_lock(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); @@ -1199,13 +1258,6 @@ static int extent_ptr_validate(struct bch_fs *c, unsigned bucket_size = ca->mi.bucket_size; rcu_read_unlock(); - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, - c, ptr_to_duplicate_device, - "multiple pointers to same device (%u)", ptr->dev); - - bkey_fsck_err_on(bucket >= nbuckets, c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); @@ -1221,7 +1273,7 @@ fsck_err: } int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1248,7 +1300,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); + ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false); if (ret) return ret; @@ -1270,9 +1322,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, - c, ptr_crc_uncompressed_size_too_small, - "checksum offset + key size > uncompressed size"); bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, ptr_crc_csum_type_unknown, "invalid checksum type"); @@ -1280,6 +1329,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, c, ptr_crc_compression_type_unknown, "invalid compression type"); + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, + "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, + "too large encoded extent"); + bkey_fsck_err_on(!crc_is_compressed(crc) && + crc.compressed_size != crc.uncompressed_size, + c, ptr_crc_uncompressed_size_mismatch, + "not compressed but compressed != uncompressed size"); + if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; @@ -1293,12 +1355,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, "redundant crc entry"); crc_since_last_ptr = true; - bkey_fsck_err_on(crc_is_encoded(crc) && - (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), - c, ptr_crc_uncompressed_size_too_big, - "too large encoded extent"); - size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: @@ -1391,166 +1447,6 @@ void bch2_ptr_swab(struct bkey_s k) } } -const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - - bkey_extent_entry_for_each(ptrs, entry) - if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) - return &entry->rebalance; - - return NULL; -} - -unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - unsigned target, unsigned compression) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned rewrite_ptrs = 0; - - if (compression) { - unsigned compression_type = bch2_compression_opt_to_type(compression); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned i = 0; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || - p.ptr.unwritten) { - rewrite_ptrs = 0; - goto incompressible; - } - - if (!p.ptr.cached && p.crc.compression_type != compression_type) - rewrite_ptrs |= 1U << i; - i++; - } - } -incompressible: - if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { - unsigned i = 0; - - bkey_for_each_ptr(ptrs, ptr) { - if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) - rewrite_ptrs |= 1U << i; - i++; - } - } - - return rewrite_ptrs; -} - -bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) -{ - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - - /* - * If it's an indirect extent, we don't delete the rebalance entry when - * done so that we know what options were applied - check if it still - * needs work done: - */ - if (r && - k.k->type == KEY_TYPE_reflink_v && - !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) - r = NULL; - - return r != NULL; -} - -static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - unsigned target, unsigned compression) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - u64 sectors = 0; - - if (compression) { - unsigned compression_type = bch2_compression_opt_to_type(compression); - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || - p.ptr.unwritten) { - sectors = 0; - goto incompressible; - } - - if (!p.ptr.cached && p.crc.compression_type != compression_type) - sectors += p.crc.compressed_size; - } - } -incompressible: - if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) - sectors += p.crc.compressed_size; - } - - return sectors; -} - -u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) -{ - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - - return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; -} - -int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - struct bch_io_opts *opts) -{ - struct bkey_s k = bkey_i_to_s(_k); - struct bch_extent_rebalance *r; - unsigned target = opts->background_target; - unsigned compression = background_compression(*opts); - bool needs_rebalance; - - if (!bkey_extent_is_direct_data(k.k)) - return 0; - - /* get existing rebalance entry: */ - r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); - if (r) { - if (k.k->type == KEY_TYPE_reflink_v) { - /* - * indirect extents: existing options take precedence, - * so that we don't move extents back and forth if - * they're referenced by different inodes with different - * options: - */ - if (r->target) - target = r->target; - if (r->compression) - compression = r->compression; - } - - r->target = target; - r->compression = compression; - } - - needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); - - if (needs_rebalance && !r) { - union bch_extent_entry *new = bkey_val_end(k); - - new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; - new->rebalance.compression = compression; - new->rebalance.target = target; - new->rebalance.unused = 0; - k.k->u64s += extent_entry_u64s(new); - } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { - /* - * For indirect extents, don't delete the rebalance entry when - * we're finished so that we know we specifically moved it or - * compressed it to its current location/compression type - */ - extent_entry_drop(k, (union bch_extent_entry *) r); - } - - return 0; -} - /* Generic extent code: */ int bch2_cut_front_s(struct bpos where, struct bkey_s k) @@ -1610,7 +1506,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) case KEY_TYPE_reflink_p: { struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); - le64_add_cpu(&p.v->idx, sub); + SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub); break; } case KEY_TYPE_inline_data: |