summaryrefslogtreecommitdiff
path: root/fs/bcachefs/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/extents.c')
-rw-r--r--fs/bcachefs/extents.c290
1 files changed, 93 insertions, 197 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 37e3d69bec06..05d5f71a7ca9 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -21,6 +21,7 @@
#include "extents.h"
#include "inode.h"
#include "journal.h"
+#include "rebalance.h"
#include "replicas.h"
#include "super.h"
#include "super-io.h"
@@ -88,6 +89,14 @@ static inline bool ptr_better(struct bch_fs *c,
u64 l1 = dev_latency(c, p1.ptr.dev);
u64 l2 = dev_latency(c, p2.ptr.dev);
+ /*
+ * Square the latencies, to bias more in favor of the faster
+ * device - we never want to stop issuing reads to the slower
+ * device altogether, so that we can update our latency numbers:
+ */
+ l1 *= l1;
+ l2 *= l2;
+
/* Pick at random, biased in favor of the faster device: */
return bch2_rand_range(l1 + l2) > l1;
@@ -169,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
/* KEY_TYPE_btree_ptr: */
int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
@@ -177,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k,
c, btree_ptr_val_too_big,
"value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
@@ -189,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
int ret = 0;
@@ -203,12 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k,
c, btree_ptr_v2_min_key_bad,
"min_key > key");
- if (flags & BCH_VALIDATE_write)
+ if ((from.flags & BCH_VALIDATE_write) &&
+ c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written)
bkey_fsck_err_on(!bp.v->sectors_written,
c, btree_ptr_v2_written_0,
"sectors_written == 0");
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
@@ -395,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
/* KEY_TYPE_reservation: */
int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
int ret = 0;
@@ -1120,6 +1130,57 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr
bch2_prt_compression_type(out, crc->compression_type);
}
+static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
+ const struct bch_extent_rebalance *r)
+{
+ prt_str(out, "rebalance:");
+
+ prt_printf(out, " replicas=%u", r->data_replicas);
+ if (r->data_replicas_from_inode)
+ prt_str(out, " (inode)");
+
+ prt_str(out, " checksum=");
+ bch2_prt_csum_opt(out, r->data_checksum);
+ if (r->data_checksum_from_inode)
+ prt_str(out, " (inode)");
+
+ if (r->background_compression || r->background_compression_from_inode) {
+ prt_str(out, " background_compression=");
+ bch2_compression_opt_to_text(out, r->background_compression);
+
+ if (r->background_compression_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->background_target || r->background_target_from_inode) {
+ prt_str(out, " background_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->background_target);
+ else
+ prt_printf(out, "%u", r->background_target);
+
+ if (r->background_target_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->promote_target || r->promote_target_from_inode) {
+ prt_str(out, " promote_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->promote_target);
+ else
+ prt_printf(out, "%u", r->promote_target);
+
+ if (r->promote_target_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->erasure_code || r->erasure_code_from_inode) {
+ prt_printf(out, " ec=%u", r->erasure_code);
+ if (r->erasure_code_from_inode)
+ prt_str(out, " (inode)");
+ }
+}
+
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
@@ -1155,18 +1216,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
(u64) ec->idx, ec->block);
break;
}
- case BCH_EXTENT_ENTRY_rebalance: {
- const struct bch_extent_rebalance *r = &entry->rebalance;
-
- prt_str(out, "rebalance: target ");
- if (c)
- bch2_target_to_text(out, c, r->target);
- else
- prt_printf(out, "%u", r->target);
- prt_str(out, " compression ");
- bch2_compression_opt_to_text(out, r->compression);
+ case BCH_EXTENT_ENTRY_rebalance:
+ bch2_extent_rebalance_to_text(out, c, &entry->rebalance);
break;
- }
+
default:
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
return;
@@ -1178,13 +1231,19 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
static int extent_ptr_validate(struct bch_fs *c,
struct bkey_s_c k,
- enum bch_validate_flags flags,
+ struct bkey_validate_context from,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk,
bool metadata)
{
int ret = 0;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr2)
+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev,
+ c, ptr_to_duplicate_device,
+ "multiple pointers to same device (%u)", ptr->dev);
+
/* bad pointers are repaired by check_fix_ptrs(): */
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
@@ -1199,13 +1258,6 @@ static int extent_ptr_validate(struct bch_fs *c,
unsigned bucket_size = ca->mi.bucket_size;
rcu_read_unlock();
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- bkey_for_each_ptr(ptrs, ptr2)
- bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev,
- c, ptr_to_duplicate_device,
- "multiple pointers to same device (%u)", ptr->dev);
-
-
bkey_fsck_err_on(bucket >= nbuckets,
c, ptr_after_last_bucket,
"pointer past last bucket (%llu > %llu)", bucket, nbuckets);
@@ -1221,7 +1273,7 @@ fsck_err:
}
int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1248,7 +1300,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
- ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false);
+ ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false);
if (ret)
return ret;
@@ -1270,9 +1322,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size,
- c, ptr_crc_uncompressed_size_too_small,
- "checksum offset + key size > uncompressed size");
bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type),
c, ptr_crc_csum_type_unknown,
"invalid checksum type");
@@ -1280,6 +1329,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
c, ptr_crc_compression_type_unknown,
"invalid compression type");
+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size,
+ c, ptr_crc_uncompressed_size_too_small,
+ "checksum offset + key size > uncompressed size");
+ bkey_fsck_err_on(crc_is_encoded(crc) &&
+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
+ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)),
+ c, ptr_crc_uncompressed_size_too_big,
+ "too large encoded extent");
+ bkey_fsck_err_on(!crc_is_compressed(crc) &&
+ crc.compressed_size != crc.uncompressed_size,
+ c, ptr_crc_uncompressed_size_mismatch,
+ "not compressed but compressed != uncompressed size");
+
if (bch2_csum_type_is_encryption(crc.csum_type)) {
if (nonce == UINT_MAX)
nonce = crc.offset + crc.nonce;
@@ -1293,12 +1355,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
"redundant crc entry");
crc_since_last_ptr = true;
- bkey_fsck_err_on(crc_is_encoded(crc) &&
- (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
- (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)),
- c, ptr_crc_uncompressed_size_too_big,
- "too large encoded extent");
-
size_ondisk = crc.compressed_size;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
@@ -1391,166 +1447,6 @@ void bch2_ptr_swab(struct bkey_s k)
}
}
-const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
-{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
-
- bkey_extent_entry_for_each(ptrs, entry)
- if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
- return &entry->rebalance;
-
- return NULL;
-}
-
-unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
- unsigned target, unsigned compression)
-{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- unsigned rewrite_ptrs = 0;
-
- if (compression) {
- unsigned compression_type = bch2_compression_opt_to_type(compression);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- unsigned i = 0;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
- p.ptr.unwritten) {
- rewrite_ptrs = 0;
- goto incompressible;
- }
-
- if (!p.ptr.cached && p.crc.compression_type != compression_type)
- rewrite_ptrs |= 1U << i;
- i++;
- }
- }
-incompressible:
- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
- unsigned i = 0;
-
- bkey_for_each_ptr(ptrs, ptr) {
- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
- rewrite_ptrs |= 1U << i;
- i++;
- }
- }
-
- return rewrite_ptrs;
-}
-
-bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
-{
- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
-
- /*
- * If it's an indirect extent, we don't delete the rebalance entry when
- * done so that we know what options were applied - check if it still
- * needs work done:
- */
- if (r &&
- k.k->type == KEY_TYPE_reflink_v &&
- !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
- r = NULL;
-
- return r != NULL;
-}
-
-static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
- unsigned target, unsigned compression)
-{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- u64 sectors = 0;
-
- if (compression) {
- unsigned compression_type = bch2_compression_opt_to_type(compression);
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
- p.ptr.unwritten) {
- sectors = 0;
- goto incompressible;
- }
-
- if (!p.ptr.cached && p.crc.compression_type != compression_type)
- sectors += p.crc.compressed_size;
- }
- }
-incompressible:
- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target))
- sectors += p.crc.compressed_size;
- }
-
- return sectors;
-}
-
-u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
-{
- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
-
- return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0;
-}
-
-int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
- struct bch_io_opts *opts)
-{
- struct bkey_s k = bkey_i_to_s(_k);
- struct bch_extent_rebalance *r;
- unsigned target = opts->background_target;
- unsigned compression = background_compression(*opts);
- bool needs_rebalance;
-
- if (!bkey_extent_is_direct_data(k.k))
- return 0;
-
- /* get existing rebalance entry: */
- r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
- if (r) {
- if (k.k->type == KEY_TYPE_reflink_v) {
- /*
- * indirect extents: existing options take precedence,
- * so that we don't move extents back and forth if
- * they're referenced by different inodes with different
- * options:
- */
- if (r->target)
- target = r->target;
- if (r->compression)
- compression = r->compression;
- }
-
- r->target = target;
- r->compression = compression;
- }
-
- needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
-
- if (needs_rebalance && !r) {
- union bch_extent_entry *new = bkey_val_end(k);
-
- new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance;
- new->rebalance.compression = compression;
- new->rebalance.target = target;
- new->rebalance.unused = 0;
- k.k->u64s += extent_entry_u64s(new);
- } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
- /*
- * For indirect extents, don't delete the rebalance entry when
- * we're finished so that we know we specifically moved it or
- * compressed it to its current location/compression type
- */
- extent_entry_drop(k, (union bch_extent_entry *) r);
- }
-
- return 0;
-}
-
/* Generic extent code: */
int bch2_cut_front_s(struct bpos where, struct bkey_s k)
@@ -1610,7 +1506,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k)
case KEY_TYPE_reflink_p: {
struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
- le64_add_cpu(&p.v->idx, sub);
+ SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub);
break;
}
case KEY_TYPE_inline_data: