summaryrefslogtreecommitdiff
path: root/fs/bcachefs/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/journal.c')
-rw-r--r--fs/bcachefs/journal.c162
1 files changed, 117 insertions, 45 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 2dc0d60c1745..2cd20114b74b 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -217,6 +217,12 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq)
if (__bch2_journal_pin_put(j, seq))
bch2_journal_reclaim_fast(j);
bch2_journal_do_writes(j);
+
+ /*
+ * for __bch2_next_write_buffer_flush_journal_buf(), when quiescing an
+ * open journal entry
+ */
+ wake_up(&j->wait);
}
/*
@@ -251,6 +257,9 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
if (!__journal_entry_is_open(old))
return;
+ if (old.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)
+ old.cur_entry_offset = j->cur_entry_offset_if_blocked;
+
/* Close out old buffer: */
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
@@ -373,6 +382,10 @@ static int journal_entry_open(struct journal *j)
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
return JOURNAL_ERR_max_in_flight;
+ if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX,
+ c, "cannot start: journal seq overflow"))
+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */
+
BUG_ON(!j->cur_entry_sectors);
buf->expires =
@@ -664,7 +677,7 @@ out:
* @seq: seq to flush
* @parent: closure object to wait with
* Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed,
- * -EIO if @seq will never be flushed
+ * -BCH_ERR_journal_flush_err if @seq will never be flushed
*
* Like bch2_journal_wait_on_seq, except that it triggers a write immediately if
* necessary
@@ -687,7 +700,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
/* Recheck under lock: */
if (j->err_seq && seq >= j->err_seq) {
- ret = -EIO;
+ ret = -BCH_ERR_journal_flush_err;
goto out;
}
@@ -794,10 +807,11 @@ int bch2_journal_flush(struct journal *j)
}
/*
- * bch2_journal_noflush_seq - tell the journal not to issue any flushes before
+ * bch2_journal_noflush_seq - ask the journal not to issue any flushes in the
+ * range [start, end)
* @seq
*/
-bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
+bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
u64 unwritten_seq;
@@ -806,15 +820,15 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush)))
return false;
- if (seq <= c->journal.flushed_seq_ondisk)
+ if (c->journal.flushed_seq_ondisk >= start)
return false;
spin_lock(&j->lock);
- if (seq <= c->journal.flushed_seq_ondisk)
+ if (c->journal.flushed_seq_ondisk >= start)
goto out;
for (unwritten_seq = journal_last_unwritten_seq(j);
- unwritten_seq < seq;
+ unwritten_seq < end;
unwritten_seq++) {
struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq);
@@ -831,19 +845,14 @@ out:
return ret;
}
-int bch2_journal_meta(struct journal *j)
+static int __bch2_journal_meta(struct journal *j)
{
- struct journal_buf *buf;
- struct journal_res res;
- int ret;
-
- memset(&res, 0, sizeof(res));
-
- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ struct journal_res res = {};
+ int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
if (ret)
return ret;
- buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
+ struct journal_buf *buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
buf->must_flush = true;
if (!buf->flush_time) {
@@ -856,27 +865,70 @@ int bch2_journal_meta(struct journal *j)
return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE);
}
+int bch2_journal_meta(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal))
+ return -EROFS;
+
+ int ret = __bch2_journal_meta(j);
+ bch2_write_ref_put(c, BCH_WRITE_REF_journal);
+ return ret;
+}
+
/* block/unlock the journal: */
void bch2_journal_unblock(struct journal *j)
{
spin_lock(&j->lock);
- j->blocked--;
+ if (!--j->blocked &&
+ j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL &&
+ j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) {
+ union journal_res_state old, new;
+
+ old.v = atomic64_read(&j->reservations.counter);
+ do {
+ new.v = old.v;
+ new.cur_entry_offset = j->cur_entry_offset_if_blocked;
+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
+ }
spin_unlock(&j->lock);
journal_wake(j);
}
+static void __bch2_journal_block(struct journal *j)
+{
+ if (!j->blocked++) {
+ union journal_res_state old, new;
+
+ old.v = atomic64_read(&j->reservations.counter);
+ do {
+ j->cur_entry_offset_if_blocked = old.cur_entry_offset;
+
+ if (j->cur_entry_offset_if_blocked >= JOURNAL_ENTRY_CLOSED_VAL)
+ break;
+
+ new.v = old.v;
+ new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL;
+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
+
+ journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
+ }
+}
+
void bch2_journal_block(struct journal *j)
{
spin_lock(&j->lock);
- j->blocked++;
+ __bch2_journal_block(j);
spin_unlock(&j->lock);
journal_quiesce(j);
}
-static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
+static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j,
+ u64 max_seq, bool *blocked)
{
struct journal_buf *ret = NULL;
@@ -893,13 +945,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
struct journal_buf *buf = j->buf + idx;
if (buf->need_flush_to_write_buffer) {
- if (seq == journal_cur_seq(j))
- __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
-
union journal_res_state s;
s.v = atomic64_read_acquire(&j->reservations.counter);
- ret = journal_state_count(s, idx)
+ unsigned open = seq == journal_cur_seq(j) && __journal_entry_is_open(s);
+
+ if (open && !*blocked) {
+ __bch2_journal_block(j);
+ *blocked = true;
+ }
+
+ ret = journal_state_count(s, idx) > open
? ERR_PTR(-EAGAIN)
: buf;
break;
@@ -912,11 +968,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
return ret;
}
-struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j,
+ u64 max_seq, bool *blocked)
{
struct journal_buf *ret;
+ *blocked = false;
+
+ wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j,
+ max_seq, blocked)) != ERR_PTR(-EAGAIN));
+ if (IS_ERR_OR_NULL(ret) && *blocked)
+ bch2_journal_unblock(j);
- wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN));
return ret;
}
@@ -945,19 +1007,17 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
}
for (nr_got = 0; nr_got < nr_want; nr_got++) {
- if (new_fs) {
- bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
- if (bu[nr_got] < 0) {
- ret = -BCH_ERR_ENOSPC_bucket_alloc;
- break;
- }
- } else {
- ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal,
- BCH_DATA_journal, cl);
- ret = PTR_ERR_OR_ZERO(ob[nr_got]);
- if (ret)
- break;
+ enum bch_watermark watermark = new_fs
+ ? BCH_WATERMARK_btree
+ : BCH_WATERMARK_normal;
+ ob[nr_got] = bch2_bucket_alloc(c, ca, watermark,
+ BCH_DATA_journal, cl);
+ ret = PTR_ERR_OR_ZERO(ob[nr_got]);
+ if (ret)
+ break;
+
+ if (!new_fs) {
ret = bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(trans, ca,
ob[nr_got]->bucket, BCH_DATA_journal,
@@ -967,9 +1027,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch_err_msg(c, ret, "marking new journal buckets");
break;
}
-
- bu[nr_got] = ob[nr_got]->bucket;
}
+
+ bu[nr_got] = ob[nr_got]->bucket;
}
if (!nr_got)
@@ -1009,8 +1069,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (ret)
goto err_unblock;
- if (!new_fs)
- bch2_write_super(c);
+ bch2_write_super(c);
/* Commit: */
if (c)
@@ -1044,9 +1103,8 @@ err_unblock:
bu[i], BCH_DATA_free, 0,
BTREE_TRIGGER_transactional));
err_free:
- if (!new_fs)
- for (i = 0; i < nr_got; i++)
- bch2_open_bucket_put(c, ob[i]);
+ for (i = 0; i < nr_got; i++)
+ bch2_open_bucket_put(c, ob[i]);
kfree(new_bucket_seq);
kfree(new_buckets);
@@ -1193,7 +1251,7 @@ void bch2_fs_journal_stop(struct journal *j)
* Always write a new journal entry, to make sure the clock hands are up
* to date (and match the superblock)
*/
- bch2_journal_meta(j);
+ __bch2_journal_meta(j);
journal_quiesce(j);
cancel_delayed_work_sync(&j->write_work);
@@ -1217,6 +1275,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
bool had_entries = false;
u64 last_seq = cur_seq, nr, seq;
+ if (cur_seq >= JOURNAL_SEQ_MAX) {
+ bch_err(c, "cannot start: journal seq overflow");
+ return -EINVAL;
+ }
+
genradix_for_each_reverse(&c->journal_entries, iter, _i) {
i = *_i;
@@ -1474,6 +1537,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
case JOURNAL_ENTRY_CLOSED_VAL:
prt_printf(out, "closed\n");
break;
+ case JOURNAL_ENTRY_BLOCKED_VAL:
+ prt_printf(out, "blocked\n");
+ break;
default:
prt_printf(out, "%u/%u\n", s.cur_entry_offset, j->cur_entry_u64s);
break;
@@ -1499,6 +1565,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
printbuf_indent_sub(out, 2);
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
+ if (!ca->mi.durability)
+ continue;
+
struct journal_device *ja = &ca->journal;
if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d))
@@ -1508,6 +1577,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
continue;
prt_printf(out, "dev %u:\n", ca->dev_idx);
+ prt_printf(out, "durability %u:\n", ca->mi.durability);
printbuf_indent_add(out, 2);
prt_printf(out, "nr\t%u\n", ja->nr);
prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size);
@@ -1519,6 +1589,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
printbuf_indent_sub(out, 2);
}
+ prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required);
+
rcu_read_unlock();
--out->atomic;