summaryrefslogtreecommitdiff
path: root/fs/bcachefs/io_write.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/io_write.c')
-rw-r--r--fs/bcachefs/io_write.c301
1 files changed, 175 insertions, 126 deletions
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index f137252bccc5..03892388832b 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -69,11 +69,10 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
u64 io_latency = time_after64(now, submit_time)
? now - submit_time
: 0;
- u64 old, new, v = atomic64_read(latency);
+ u64 old, new;
+ old = atomic64_read(latency);
do {
- old = v;
-
/*
* If the io latency was reasonably close to the current
* latency, skip doing the update and atomic operation - most of
@@ -84,7 +83,7 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
break;
new = ewma_add(old, io_latency, 5);
- } while ((v = atomic64_cmpxchg(latency, old, new)) != old);
+ } while (!atomic64_try_cmpxchg(latency, &old, new));
bch2_congested_acct(ca, io_latency, now, rw);
@@ -165,8 +164,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
bch2_trans_copy_iter(&iter, extent_iter);
- for_each_btree_key_upto_continue_norestart(iter,
- new->k.p, BTREE_ITER_SLOTS, old, ret) {
+ for_each_btree_key_max_continue_norestart(iter,
+ new->k.p, BTREE_ITER_slots, old, ret) {
s64 sectors = min(new->k.p.offset, old.k->p.offset) -
max(bkey_start_offset(&new->k),
bkey_start_offset(old.k));
@@ -199,9 +198,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
u64 new_i_size,
s64 i_sectors_delta)
{
- struct btree_iter iter;
- struct bkey_i *k;
- struct bkey_i_inode_v3 *inode;
/*
* Crazy performance optimization:
* Every extent update needs to also update the inode: the inode trigger
@@ -213,26 +209,38 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
* to be journalled - if we crash, the bi_journal_seq update will be
* lost, but that's fine.
*/
- unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
- int ret;
+ unsigned inode_update_flags = BTREE_UPDATE_nojournal;
- k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0,
extent_iter->pos.inode,
extent_iter->snapshot),
- BTREE_ITER_CACHED);
- ret = PTR_ERR_OR_ZERO(k);
+ BTREE_ITER_intent|
+ BTREE_ITER_cached);
+ int ret = bkey_err(k);
if (unlikely(ret))
return ret;
- if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
- k = bch2_inode_to_v3(trans, k);
- ret = PTR_ERR_OR_ZERO(k);
+ /*
+ * varint_decode_fast(), in the inode .invalid method, reads up to 7
+ * bytes past the end of the buffer:
+ */
+ struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8);
+ ret = PTR_ERR_OR_ZERO(k_mut);
+ if (unlikely(ret))
+ goto err;
+
+ bkey_reassemble(k_mut, k);
+
+ if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) {
+ k_mut = bch2_inode_to_v3(trans, k_mut);
+ ret = PTR_ERR_OR_ZERO(k_mut);
if (unlikely(ret))
goto err;
}
- inode = bkey_i_to_inode_v3(k);
+ struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut);
if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
new_i_size > le64_to_cpu(inode->v.bi_size)) {
@@ -251,7 +259,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
}
ret = bch2_trans_update(trans, &iter, &inode->k_i,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ BTREE_UPDATE_internal_snapshot_node|
inode_update_flags);
err:
bch2_trans_iter_exit(trans, &iter);
@@ -360,9 +368,9 @@ static int bch2_write_index_default(struct bch_write_op *op)
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
bkey_start_pos(&sk.k->k),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ BTREE_ITER_slots|BTREE_ITER_intent);
- ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?:
+ ret = bch2_bkey_set_needs_rebalance(c, &op->opts, sk.k) ?:
bch2_extent_update(trans, inum, &iter, sk.k,
&op->res,
op->new_i_size, &op->i_sectors_delta,
@@ -388,6 +396,31 @@ static int bch2_write_index_default(struct bch_write_op *op)
/* Writes */
+static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
+ u64 offset)
+{
+ bch2_inum_offset_err_msg(op->c, out,
+ (subvol_inum) { op->subvol, op->pos.inode, },
+ offset << 9);
+ prt_printf(out, "write error%s: ",
+ op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
+}
+
+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
+{
+ __bch2_write_op_error(out, op, op->pos.offset);
+}
+
+static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
+ struct bch_write_op *op, u64 offset)
+{
+ bch2_inum_offset_err_msg_trans(trans, out,
+ (subvol_inum) { op->subvol, op->pos.inode, },
+ offset << 9);
+ prt_printf(out, "write error%s: ",
+ op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
+}
+
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
enum bch_data_type type,
const struct bkey_i *k,
@@ -399,13 +432,12 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
BUG_ON(c->opts.nochanges);
bkey_for_each_ptr(ptrs, ptr) {
- BUG_ON(!bch2_dev_exists2(c, ptr->dev));
-
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bch_dev *ca = nocow
+ ? bch2_dev_have_ref(c, ptr->dev)
+ : bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE);
if (to_entry(ptr + 1) < ptrs.end) {
- n = to_wbio(bio_alloc_clone(NULL, &wbio->bio,
- GFP_NOFS, &ca->replica_set));
+ n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOFS, &c->replica_set));
n->bio.bi_end_io = wbio->bio.bi_end_io;
n->bio.bi_private = wbio->bio.bi_private;
@@ -422,11 +454,12 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
n->c = c;
n->dev = ptr->dev;
- n->have_ioref = nocow || bch2_dev_get_ioref(ca,
- type == BCH_DATA_btree ? READ : WRITE);
+ n->have_ioref = ca != NULL;
n->nocow = nocow;
n->submit_time = local_clock();
n->inode_offset = bkey_start_offset(&k->k);
+ if (nocow)
+ n->nocow_bucket = PTR_BUCKET_NR(ca, ptr);
n->bio.bi_iter.bi_sector = ptr->offset;
if (likely(n->have_ioref)) {
@@ -473,7 +506,6 @@ static void bch2_write_done(struct closure *cl)
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
{
struct keylist *keys = &op->insert_keys;
- struct bch_extent_ptr *ptr;
struct bkey_i *src, *dst = keys->keys, *n;
for (src = keys->keys; src != keys->top; src = n) {
@@ -525,14 +557,14 @@ static void __bch2_write_index(struct bch_write_op *op)
op->written += sectors_start - keylist_sectors(keys);
- if (ret && !bch2_err_matches(ret, EROFS)) {
+ if (unlikely(ret && !bch2_err_matches(ret, EROFS))) {
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
- bch_err_inum_offset_ratelimited(c,
- insert->k.p.inode, insert->k.p.offset << 9,
- "%s write error while doing btree update: %s",
- op->flags & BCH_WRITE_MOVE ? "move" : "user",
- bch2_err_str(ret));
+ struct printbuf buf = PRINTBUF;
+ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
+ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
}
if (ret)
@@ -548,7 +580,7 @@ out:
err:
keys->top = keys->keys;
op->error = ret;
- op->flags |= BCH_WRITE_DONE;
+ op->flags |= BCH_WRITE_SUBMITTED;
goto out;
}
@@ -583,7 +615,7 @@ static CLOSURE_CALLBACK(bch2_write_index)
struct workqueue_struct *wq = index_update_wq(op);
unsigned long flags;
- if ((op->flags & BCH_WRITE_DONE) &&
+ if ((op->flags & BCH_WRITE_SUBMITTED) &&
(op->flags & BCH_WRITE_MOVE))
bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
@@ -615,9 +647,7 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
while (1) {
spin_lock_irq(&wp->writes_lock);
- op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
- if (op)
- list_del(&op->wp_list);
+ op = list_pop_entry(&wp->writes, struct bch_write_op, wp_list);
wp_update_state(wp, op != NULL);
spin_unlock_irq(&wp->writes_lock);
@@ -628,7 +658,7 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
__bch2_write_index(op);
- if (!(op->flags & BCH_WRITE_DONE))
+ if (!(op->flags & BCH_WRITE_SUBMITTED))
__bch2_write(op);
else
bch2_write_done(&op->cl);
@@ -642,7 +672,9 @@ static void bch2_write_endio(struct bio *bio)
struct bch_write_bio *wbio = to_wbio(bio);
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
struct bch_fs *c = wbio->c;
- struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
+ struct bch_dev *ca = wbio->have_ioref
+ ? bch2_dev_have_ref(c, wbio->dev)
+ : NULL;
if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
op->pos.inode,
@@ -653,8 +685,12 @@ static void bch2_write_endio(struct bio *bio)
op->flags |= BCH_WRITE_IO_ERROR;
}
- if (wbio->nocow)
+ if (wbio->nocow) {
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ POS(ca->dev_idx, wbio->nocow_bucket),
+ BUCKET_NOCOW_LOCK_UPDATE);
set_bit(wbio->dev, op->devs_need_flush->d);
+ }
if (wbio->have_ioref) {
bch2_latency_acct(ca, wbio->submit_time, WRITE);
@@ -685,7 +721,7 @@ static void init_append_extent(struct bch_write_op *op,
e = bkey_extent_init(op->insert_keys.top);
e->k.p = op->pos;
e->k.size = crc.uncompressed_size;
- e->k.version = version;
+ e->k.bversion = version;
if (crc.csum_type ||
crc.compression_type ||
@@ -847,7 +883,7 @@ static enum prep_encoded_ret {
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return PREP_ENCODED_CHECKSUM_ERR;
- if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
+ if (bch2_bio_uncompress_inplace(op, bio))
return PREP_ENCODED_ERR;
}
@@ -1068,8 +1104,14 @@ do_write:
*_dst = dst;
return more;
csum_err:
- bch_err(c, "%s writ error: error verifying existing checksum while rewriting existing data (memory corruption?)",
- op->flags & BCH_WRITE_MOVE ? "move" : "user");
+ {
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error(&buf, op);
+ prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)");
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
ret = -EIO;
err:
if (to_wbio(dst)->bounce)
@@ -1093,30 +1135,21 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op,
return false;
e = bkey_s_c_to_extent(k);
+
+ rcu_read_lock();
extent_for_each_ptr_decode(e, p, entry) {
- if (crc_is_encoded(p.crc) || p.has_ec)
+ if (crc_is_encoded(p.crc) || p.has_ec) {
+ rcu_read_unlock();
return false;
+ }
replicas += bch2_extent_ptr_durability(c, &p);
}
+ rcu_read_unlock();
return replicas >= op->opts.data_replicas;
}
-static inline void bch2_nocow_write_unlock(struct bch_write_op *op)
-{
- struct bch_fs *c = op->c;
-
- for_each_keylist_key(&op->insert_keys, k) {
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
-
- bkey_for_each_ptr(ptrs, ptr)
- bch2_bucket_nocow_unlock(&c->nocow_locks,
- PTR_BUCKET_POS(c, ptr),
- BUCKET_NOCOW_LOCK_UPDATE);
- }
-}
-
static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *orig,
@@ -1150,7 +1183,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
return bch2_extent_update_i_size_sectors(trans, iter,
min(new->k.p.offset << 9, new_i_size), 0) ?:
bch2_trans_update(trans, iter, new,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ BTREE_UPDATE_internal_snapshot_node);
}
static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
@@ -1159,9 +1192,9 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
struct btree_trans *trans = bch2_trans_get(c);
for_each_keylist_key(&op->insert_keys, orig) {
- int ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents,
+ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
bkey_start_pos(&orig->k), orig->k.p,
- BTREE_ITER_INTENT, k,
+ BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
}));
@@ -1169,11 +1202,11 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
if (ret && !bch2_err_matches(ret, EROFS)) {
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
- bch_err_inum_offset_ratelimited(c,
- insert->k.p.inode, insert->k.p.offset << 9,
- "%s write error while doing btree update: %s",
- op->flags & BCH_WRITE_MOVE ? "move" : "user",
- bch2_err_str(ret));
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k));
+ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
}
if (ret) {
@@ -1187,8 +1220,6 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
static void __bch2_nocow_write_done(struct bch_write_op *op)
{
- bch2_nocow_write_unlock(op);
-
if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
op->error = -EIO;
} else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN))
@@ -1218,7 +1249,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
u32 snapshot;
struct bucket_to_lock *stale_at;
- int ret;
+ int stale, ret;
if (op->flags & BCH_WRITE_MOVE)
return;
@@ -1234,12 +1265,16 @@ retry:
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(op->pos.inode, op->pos.offset, snapshot),
- BTREE_ITER_SLOTS);
+ BTREE_ITER_slots);
while (1) {
struct bio *bio = &op->wbio.bio;
buckets.nr = 0;
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ break;
+
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
@@ -1259,14 +1294,15 @@ retry:
/* Get iorefs before dropping btree locks: */
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) {
- struct bpos b = PTR_BUCKET_POS(c, ptr);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
+ if (unlikely(!ca))
+ goto err_get_ioref;
+
+ struct bpos b = PTR_BUCKET_POS(ca, ptr);
struct nocow_lock_bucket *l =
bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
prefetch(l);
- if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
- goto err_get_ioref;
-
/* XXX allocating memory with btree locks held - rare */
darray_push_gfp(&buckets, ((struct bucket_to_lock) {
.b = b, .gen = ptr->gen, .l = l,
@@ -1285,16 +1321,14 @@ retry:
bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
darray_for_each(buckets, i) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);
+ struct bch_dev *ca = bch2_dev_have_ref(c, i->b.inode);
__bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
bucket_to_u64(i->b),
BUCKET_NOCOW_LOCK_UPDATE);
- rcu_read_lock();
- bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
- rcu_read_unlock();
-
+ int gen = bucket_gen_get(ca, i->b.offset);
+ stale = gen < 0 ? gen : gen_after(gen, i->gen);
if (unlikely(stale)) {
stale_at = i;
goto err_bucket_stale;
@@ -1308,7 +1342,7 @@ retry:
wbio_init(bio)->put_bio = true;
bio->bi_opf = op->wbio.bio.bi_opf;
} else {
- op->flags |= BCH_WRITE_DONE;
+ op->flags |= BCH_WRITE_SUBMITTED;
}
op->pos.offset += bio_sectors(bio);
@@ -1322,7 +1356,7 @@ retry:
op->insert_keys.top, true);
bch2_keylist_push(&op->insert_keys);
- if (op->flags & BCH_WRITE_DONE)
+ if (op->flags & BCH_WRITE_SUBMITTED)
break;
bch2_btree_iter_advance(&iter);
}
@@ -1332,19 +1366,21 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
+ bch2_trans_put(trans);
+ darray_exit(&buckets);
+
if (ret) {
- bch_err_inum_offset_ratelimited(c,
- op->pos.inode, op->pos.offset << 9,
- "%s: btree lookup error %s", __func__, bch2_err_str(ret));
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error(&buf, op);
+ prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
op->error = ret;
- op->flags |= BCH_WRITE_DONE;
+ op->flags |= BCH_WRITE_SUBMITTED;
}
- bch2_trans_put(trans);
- darray_exit(&buckets);
-
/* fallback to cow write path? */
- if (!(op->flags & BCH_WRITE_DONE)) {
+ if (!(op->flags & BCH_WRITE_SUBMITTED)) {
closure_sync(&op->cl);
__bch2_nocow_write_done(op);
op->insert_keys.top = op->insert_keys.keys;
@@ -1362,7 +1398,7 @@ err:
return;
err_get_ioref:
darray_for_each(buckets, i)
- percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);
+ percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref);
/* Fall back to COW path: */
goto out;
@@ -1373,8 +1409,18 @@ err_bucket_stale:
break;
}
- /* We can retry this: */
- ret = -BCH_ERR_transaction_restart;
+ struct printbuf buf = PRINTBUF;
+ if (bch2_fs_inconsistent_on(stale < 0, c,
+ "pointer to invalid bucket in nocow path on device %llu\n %s",
+ stale_at->b.inode,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = -EIO;
+ } else {
+ /* We can retry this: */
+ ret = -BCH_ERR_transaction_restart;
+ }
+ printbuf_exit(&buf);
+
goto err_get_ioref;
}
@@ -1390,7 +1436,7 @@ static void __bch2_write(struct bch_write_op *op)
if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
bch2_nocow_write(op);
- if (op->flags & BCH_WRITE_DONE)
+ if (op->flags & BCH_WRITE_SUBMITTED)
goto out_nofs_restore;
}
again:
@@ -1417,7 +1463,7 @@ again:
* freeing up space on specific disks, which means that
* allocations for specific disks may hang arbitrarily long:
*/
- ret = bch2_trans_do(c, NULL, NULL, 0,
+ ret = bch2_trans_run(c, lockrestart_do(trans,
bch2_alloc_sectors_start_trans(trans,
op->target,
op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
@@ -1427,9 +1473,7 @@ again:
op->nr_replicas_required,
op->watermark,
op->flags,
- (op->flags & (BCH_WRITE_ALLOC_NOWAIT|
- BCH_WRITE_ONLY_SPECIFIED_DEVS))
- ? NULL : &op->cl, &wp));
+ &op->cl, &wp)));
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
break;
@@ -1445,16 +1489,16 @@ again:
bch2_alloc_sectors_done_inlined(c, wp);
err:
if (ret <= 0) {
- op->flags |= BCH_WRITE_DONE;
-
- if (ret < 0) {
- if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT))
- bch_err_inum_offset_ratelimited(c,
- op->pos.inode,
- op->pos.offset << 9,
- "%s(): %s error: %s", __func__,
- op->flags & BCH_WRITE_MOVE ? "move" : "user",
- bch2_err_str(ret));
+ op->flags |= BCH_WRITE_SUBMITTED;
+
+ if (unlikely(ret < 0)) {
+ if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error(&buf, op);
+ prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret));
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
op->error = ret;
break;
}
@@ -1481,12 +1525,13 @@ err:
* once, as that signals backpressure to the caller.
*/
if ((op->flags & BCH_WRITE_SYNC) ||
- (!(op->flags & BCH_WRITE_DONE) &&
+ (!(op->flags & BCH_WRITE_SUBMITTED) &&
!(op->flags & BCH_WRITE_IN_WORKER))) {
- closure_sync(&op->cl);
+ bch2_wait_on_allocator(c, &op->cl);
+
__bch2_write_index(op);
- if (!(op->flags & BCH_WRITE_DONE))
+ if (!(op->flags & BCH_WRITE_SUBMITTED))
goto again;
bch2_write_done(&op->cl);
} else {
@@ -1505,8 +1550,10 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
unsigned sectors;
int ret;
+ memset(&op->failed, 0, sizeof(op->failed));
+
op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
- op->flags |= BCH_WRITE_DONE;
+ op->flags |= BCH_WRITE_SUBMITTED;
bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
@@ -1523,7 +1570,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
id = bkey_inline_data_init(op->insert_keys.top);
id->k.p = op->pos;
- id->k.version = op->version;
+ id->k.bversion = op->version;
id->k.size = sectors;
iter = bio->bi_iter;
@@ -1569,17 +1616,19 @@ CLOSURE_CALLBACK(bch2_write)
BUG_ON(!op->write_point.v);
BUG_ON(bkey_eq(op->pos, POS_MAX));
+ if (op->flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
+ op->flags |= BCH_WRITE_ALLOC_NOWAIT;
+
op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
op->start_time = local_clock();
bch2_keylist_init(&op->insert_keys, op->inline_keys);
wbio_init(bio)->put_bio = false;
- if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) {
- bch_err_inum_offset_ratelimited(c,
- op->pos.inode,
- op->pos.offset << 9,
- "%s write error: misaligned write",
- op->flags & BCH_WRITE_MOVE ? "move" : "user");
+ if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) {
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error(&buf, op);
+ prt_printf(&buf, "misaligned write");
+ printbuf_exit(&buf);
op->error = -EIO;
goto err;
}
@@ -1639,8 +1688,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
prt_bitflags(out, bch2_write_flags, op->flags);
prt_newline(out);
- prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
- prt_newline(out);
+ prt_printf(out, "ref: %u\n", closure_nr_remaining(&op->cl));
printbuf_indent_sub(out, 2);
}
@@ -1648,13 +1696,14 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
void bch2_fs_io_write_exit(struct bch_fs *c)
{
mempool_exit(&c->bio_bounce_pages);
+ bioset_exit(&c->replica_set);
bioset_exit(&c->bio_write);
}
int bch2_fs_io_write_init(struct bch_fs *c)
{
- if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
- BIOSET_NEED_BVECS))
+ if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), BIOSET_NEED_BVECS) ||
+ bioset_init(&c->replica_set, 4, offsetof(struct bch_write_bio, bio), 0))
return -BCH_ERR_ENOMEM_bio_write_init;
if (mempool_init_page_pool(&c->bio_bounce_pages,