summaryrefslogtreecommitdiff
path: root/fs/bcachefs/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/extents.c')
-rw-r--r--fs/bcachefs/extents.c409
1 files changed, 261 insertions, 148 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 1b25f84e4b9c..a864de231b69 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -13,6 +13,7 @@
#include "btree_iter.h"
#include "buckets.h"
#include "checksum.h"
+#include "compress.h"
#include "debug.h"
#include "disk_groups.h"
#include "error.h"
@@ -162,17 +163,19 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
/* KEY_TYPE_btree_ptr: */
-int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
- if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) {
- prt_printf(err, "value too big (%zu > %u)",
- bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
- return -BCH_ERR_invalid_bkey;
- }
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err,
+ btree_ptr_val_too_big,
+ "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
- return bch2_bkey_ptrs_invalid(c, k, flags, err);
+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+ return ret;
}
void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
@@ -181,17 +184,20 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
bch2_bkey_ptrs_to_text(out, c, k);
}
-int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
- if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) {
- prt_printf(err, "value too big (%zu > %zu)",
- bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
- return -BCH_ERR_invalid_bkey;
- }
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
+ btree_ptr_v2_val_too_big,
+ "value too big (%zu > %zu)",
+ bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
- return bch2_bkey_ptrs_invalid(c, k, flags, err);
+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+ return ret;
}
void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
@@ -372,19 +378,18 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
/* KEY_TYPE_reservation: */
-int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+ int ret = 0;
- if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) {
- prt_printf(err, "invalid nr_replicas (%u)",
- r.v->nr_replicas);
- return -BCH_ERR_invalid_bkey;
- }
-
- return 0;
+ bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err,
+ reservation_key_nr_replicas_invalid,
+ "invalid nr_replicas (%u)", r.v->nr_replicas);
+fsck_err:
+ return ret;
}
void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
@@ -757,18 +762,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
return i;
}
-static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
-{
- union bch_extent_entry *next = extent_entry_next(entry);
-
- /* stripes have ptrs, but their layout doesn't work with this code */
- BUG_ON(k.k->type == KEY_TYPE_stripe);
-
- memmove_u64s_down(entry, next,
- (u64 *) bkey_val_end(k) - (u64 *) next);
- k.k->u64s -= (u64 *) next - (u64 *) entry;
-}
-
/*
* Returns pointer to the next entry after the one being dropped:
*/
@@ -992,10 +985,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
- struct bch_extent_crc_unpacked crc;
- const struct bch_extent_ptr *ptr;
- const struct bch_extent_stripe_ptr *ec;
- struct bch_dev *ca;
bool first = true;
if (c)
@@ -1006,9 +995,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " ");
switch (__extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_ptr:
- ptr = entry_to_ptr(entry);
- ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+ case BCH_EXTENT_ENTRY_ptr: {
+ const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
? bch_dev_bkey_exists(c, ptr->dev)
: NULL;
@@ -1030,10 +1019,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " stale");
}
break;
+ }
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
- case BCH_EXTENT_ENTRY_crc128:
- crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+ case BCH_EXTENT_ENTRY_crc128: {
+ struct bch_extent_crc_unpacked crc =
+ bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
crc.compressed_size,
@@ -1042,12 +1033,26 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
bch2_csum_types[crc.csum_type],
bch2_compression_types[crc.compression_type]);
break;
- case BCH_EXTENT_ENTRY_stripe_ptr:
- ec = &entry->stripe_ptr;
+ }
+ case BCH_EXTENT_ENTRY_stripe_ptr: {
+ const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
prt_printf(out, "ec: idx %llu block %u",
(u64) ec->idx, ec->block);
break;
+ }
+ case BCH_EXTENT_ENTRY_rebalance: {
+ const struct bch_extent_rebalance *r = &entry->rebalance;
+
+ prt_str(out, "rebalance: target ");
+ if (c)
+ bch2_target_to_text(out, c, r->target);
+ else
+ prt_printf(out, "%u", r->target);
+ prt_str(out, " compression ");
+ bch2_compression_opt_to_text(out, r->compression);
+ break;
+ }
default:
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
return;
@@ -1057,7 +1062,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
}
}
-static int extent_ptr_invalid(const struct bch_fs *c,
+static int extent_ptr_invalid(struct bch_fs *c,
struct bkey_s_c k,
enum bkey_invalid_flags flags,
const struct bch_extent_ptr *ptr,
@@ -1070,6 +1075,7 @@ static int extent_ptr_invalid(const struct bch_fs *c,
u64 bucket;
u32 bucket_offset;
struct bch_dev *ca;
+ int ret = 0;
if (!bch2_dev_exists2(c, ptr->dev)) {
/*
@@ -1080,41 +1086,33 @@ static int extent_ptr_invalid(const struct bch_fs *c,
if (flags & BKEY_INVALID_WRITE)
return 0;
- prt_printf(err, "pointer to invalid device (%u)", ptr->dev);
- return -BCH_ERR_invalid_bkey;
+ bkey_fsck_err(c, err, ptr_to_invalid_device,
+ "pointer to invalid device (%u)", ptr->dev);
}
ca = bch_dev_bkey_exists(c, ptr->dev);
bkey_for_each_ptr(ptrs, ptr2)
- if (ptr != ptr2 && ptr->dev == ptr2->dev) {
- prt_printf(err, "multiple pointers to same device (%u)", ptr->dev);
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err,
+ ptr_to_duplicate_device,
+ "multiple pointers to same device (%u)", ptr->dev);
bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset);
- if (bucket >= ca->mi.nbuckets) {
- prt_printf(err, "pointer past last bucket (%llu > %llu)",
- bucket, ca->mi.nbuckets);
- return -BCH_ERR_invalid_bkey;
- }
-
- if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) {
- prt_printf(err, "pointer before first bucket (%llu < %u)",
- bucket, ca->mi.first_bucket);
- return -BCH_ERR_invalid_bkey;
- }
-
- if (bucket_offset + size_ondisk > ca->mi.bucket_size) {
- prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)",
+ bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err,
+ ptr_after_last_bucket,
+ "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets);
+ bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err,
+ ptr_before_first_bucket,
+ "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket);
+ bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err,
+ ptr_spans_multiple_buckets,
+ "pointer spans multiple buckets (%u + %u > %u)",
bucket_offset, size_ondisk, ca->mi.bucket_size);
- return -BCH_ERR_invalid_bkey;
- }
-
- return 0;
+fsck_err:
+ return ret;
}
-int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
@@ -1124,24 +1122,22 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0;
- bool unwritten = false, have_ec = false, crc_since_last_ptr = false;
- int ret;
+ bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false;
+ int ret = 0;
if (bkey_is_btree_ptr(k.k))
size_ondisk = btree_sectors(c);
bkey_extent_entry_for_each(ptrs, entry) {
- if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) {
- prt_printf(err, "invalid extent entry type (got %u, max %u)",
- __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err,
+ extent_ptrs_invalid_entry,
+ "invalid extent entry type (got %u, max %u)",
+ __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
- if (bkey_is_btree_ptr(k.k) &&
- !extent_entry_is_ptr(entry)) {
- prt_printf(err, "has non ptr field");
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(bkey_is_btree_ptr(k.k) &&
+ !extent_entry_is_ptr(entry), c, err,
+ btree_ptr_has_non_ptr,
+ "has non ptr field");
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
@@ -1150,22 +1146,15 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
if (ret)
return ret;
- if (nr_ptrs && unwritten != entry->ptr.unwritten) {
- prt_printf(err, "extent with unwritten and written ptrs");
- return -BCH_ERR_invalid_bkey;
- }
-
- if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
- prt_printf(err, "has unwritten ptrs");
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err,
+ ptr_cached_and_erasure_coded,
+ "cached, erasure coded ptr");
- if (entry->ptr.cached && have_ec) {
- prt_printf(err, "cached, erasure coded ptr");
- return -BCH_ERR_invalid_bkey;
- }
+ if (!entry->ptr.unwritten)
+ have_written = true;
+ else
+ have_unwritten = true;
- unwritten = entry->ptr.unwritten;
have_ec = false;
crc_since_last_ptr = false;
nr_ptrs++;
@@ -1175,72 +1164,77 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- if (crc.offset + crc.live_size >
- crc.uncompressed_size) {
- prt_printf(err, "checksum offset + key size > uncompressed size");
- return -BCH_ERR_invalid_bkey;
- }
-
- size_ondisk = crc.compressed_size;
-
- if (!bch2_checksum_type_valid(c, crc.csum_type)) {
- prt_printf(err, "invalid checksum type");
- return -BCH_ERR_invalid_bkey;
- }
-
- if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) {
- prt_printf(err, "invalid compression type");
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err,
+ ptr_crc_uncompressed_size_too_small,
+ "checksum offset + key size > uncompressed size");
+ bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err,
+ ptr_crc_csum_type_unknown,
+ "invalid checksum type");
+ bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err,
+ ptr_crc_compression_type_unknown,
+ "invalid compression type");
if (bch2_csum_type_is_encryption(crc.csum_type)) {
if (nonce == UINT_MAX)
nonce = crc.offset + crc.nonce;
- else if (nonce != crc.offset + crc.nonce) {
- prt_printf(err, "incorrect nonce");
- return -BCH_ERR_invalid_bkey;
- }
+ else if (nonce != crc.offset + crc.nonce)
+ bkey_fsck_err(c, err, ptr_crc_nonce_mismatch,
+ "incorrect nonce");
}
- if (crc_since_last_ptr) {
- prt_printf(err, "redundant crc entry");
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(crc_since_last_ptr, c, err,
+ ptr_crc_redundant,
+ "redundant crc entry");
crc_since_last_ptr = true;
+
+ bkey_fsck_err_on(crc_is_encoded(crc) &&
+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
+ (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err,
+ ptr_crc_uncompressed_size_too_big,
+ "too large encoded extent");
+
+ size_ondisk = crc.compressed_size;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
- if (have_ec) {
- prt_printf(err, "redundant stripe entry");
- return -BCH_ERR_invalid_bkey;
- }
+ bkey_fsck_err_on(have_ec, c, err,
+ ptr_stripe_redundant,
+ "redundant stripe entry");
have_ec = true;
break;
- case BCH_EXTENT_ENTRY_rebalance:
+ case BCH_EXTENT_ENTRY_rebalance: {
+ const struct bch_extent_rebalance *r = &entry->rebalance;
+
+ if (!bch2_compression_opt_valid(r->compression)) {
+ struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
+ prt_printf(err, "invalid compression opt %u:%u",
+ opt.type, opt.level);
+ return -BCH_ERR_invalid_bkey;
+ }
break;
}
+ }
}
- if (!nr_ptrs) {
- prt_str(err, "no ptrs");
- return -BCH_ERR_invalid_bkey;
- }
-
- if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
- prt_str(err, "too many ptrs");
- return -BCH_ERR_invalid_bkey;
- }
-
- if (crc_since_last_ptr) {
- prt_printf(err, "redundant crc entry");
- return -BCH_ERR_invalid_bkey;
- }
-
- if (have_ec) {
- prt_printf(err, "redundant stripe entry");
- return -BCH_ERR_invalid_bkey;
- }
-
- return 0;
+ bkey_fsck_err_on(!nr_ptrs, c, err,
+ extent_ptrs_no_ptrs,
+ "no ptrs");
+ bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err,
+ extent_ptrs_too_many_ptrs,
+ "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX);
+ bkey_fsck_err_on(have_written && have_unwritten, c, err,
+ extent_ptrs_written_and_unwritten,
+ "extent with unwritten and written ptrs");
+ bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err,
+ extent_ptrs_unwritten,
+ "has unwritten ptrs");
+ bkey_fsck_err_on(crc_since_last_ptr, c, err,
+ extent_ptrs_redundant_crc,
+ "redundant crc entry");
+ bkey_fsck_err_on(have_ec, c, err,
+ extent_ptrs_redundant_stripe,
+ "redundant stripe entry");
+fsck_err:
+ return ret;
}
void bch2_ptr_swab(struct bkey_s k)
@@ -1281,6 +1275,125 @@ void bch2_ptr_swab(struct bkey_s k)
}
}
+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
+ return &entry->rebalance;
+
+ return NULL;
+}
+
+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned rewrite_ptrs = 0;
+
+ if (compression) {
+ unsigned compression_type = bch2_compression_opt_to_type(compression);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned i = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) {
+ rewrite_ptrs = 0;
+ goto incompressible;
+ }
+
+ if (!p.ptr.cached && p.crc.compression_type != compression_type)
+ rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+incompressible:
+ if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
+ const struct bch_extent_ptr *ptr;
+ unsigned i = 0;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
+ rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+
+ return rewrite_ptrs;
+}
+
+bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+ /*
+ * If it's an indirect extent, we don't delete the rebalance entry when
+ * done so that we know what options were applied - check if it still
+ * needs work done:
+ */
+ if (r &&
+ k.k->type == KEY_TYPE_reflink_v &&
+ !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
+ r = NULL;
+
+ return r != NULL;
+}
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_s k = bkey_i_to_s(_k);
+ struct bch_extent_rebalance *r;
+ bool needs_rebalance;
+
+ if (!bkey_extent_is_direct_data(k.k))
+ return 0;
+
+ /* get existing rebalance entry: */
+ r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
+ if (r) {
+ if (k.k->type == KEY_TYPE_reflink_v) {
+ /*
+ * indirect extents: existing options take precedence,
+ * so that we don't move extents back and forth if
+ * they're referenced by different inodes with different
+ * options:
+ */
+ if (r->target)
+ target = r->target;
+ if (r->compression)
+ compression = r->compression;
+ }
+
+ r->target = target;
+ r->compression = compression;
+ }
+
+ needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
+
+ if (needs_rebalance && !r) {
+ union bch_extent_entry *new = bkey_val_end(k);
+
+ new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance;
+ new->rebalance.compression = compression;
+ new->rebalance.target = target;
+ new->rebalance.unused = 0;
+ k.k->u64s += extent_entry_u64s(new);
+ } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
+ /*
+ * For indirect extents, don't delete the rebalance entry when
+ * we're finished so that we know we specifically moved it or
+ * compressed it to its current location/compression type
+ */
+ extent_entry_drop(k, (union bch_extent_entry *) r);
+ }
+
+ return 0;
+}
+
/* Generic extent code: */
int bch2_cut_front_s(struct bpos where, struct bkey_s k)