summaryrefslogtreecommitdiff
path: root/fs/bcachefs/error.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/error.c')
-rw-r--r--fs/bcachefs/error.c325
1 files changed, 276 insertions, 49 deletions
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index d32c8bebe46c..038da6a61f6b 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -1,6 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_iter.h"
#include "error.h"
+#include "fs-common.h"
+#include "journal.h"
+#include "recovery_passes.h"
#include "super.h"
#include "thread_with_file.h"
@@ -13,9 +18,11 @@ bool bch2_inconsistent_error(struct bch_fs *c)
switch (c->opts.errors) {
case BCH_ON_ERROR_continue:
return false;
+ case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
- bch_err(c, "inconsistency detected - emergency read only");
+ bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
+ journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
panic(bch2_fmt(c, "panic after error"));
@@ -25,11 +32,16 @@ bool bch2_inconsistent_error(struct bch_fs *c)
}
}
-void bch2_topology_error(struct bch_fs *c)
+int bch2_topology_error(struct bch_fs *c)
{
set_bit(BCH_FS_topology_error, &c->flags);
- if (!test_bit(BCH_FS_fsck_running, &c->flags))
+ if (!test_bit(BCH_FS_recovery_running, &c->flags)) {
bch2_inconsistent_error(c);
+ return -BCH_ERR_btree_need_topology_repair;
+ } else {
+ return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
+ -BCH_ERR_btree_node_read_validate_error;
+ }
}
void bch2_fatal_error(struct bch_fs *c)
@@ -89,7 +101,7 @@ static enum ask_yn parse_yn_response(char *buf)
}
#ifdef __KERNEL__
-static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans)
{
struct stdio_redirect *stdio = c->stdio;
@@ -99,25 +111,44 @@ static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
if (!stdio)
return YN_NO;
- char buf[100];
+ if (trans)
+ bch2_trans_unlock(trans);
+
+ unsigned long unlock_long_at = trans ? jiffies + HZ * 2 : 0;
+ darray_char line = {};
int ret;
do {
+ unsigned long t;
bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
+rewait:
+ t = unlock_long_at
+ ? max_t(long, unlock_long_at - jiffies, 0)
+ : MAX_SCHEDULE_TIMEOUT;
+
+ int r = bch2_stdio_redirect_readline_timeout(stdio, &line, t);
+ if (r == -ETIME) {
+ bch2_trans_unlock_long(trans);
+ unlock_long_at = 0;
+ goto rewait;
+ }
- int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
- if (r < 0)
- return YN_NO;
- buf[r] = '\0';
- } while ((ret = parse_yn_response(buf)) < 0);
+ if (r < 0) {
+ ret = YN_NO;
+ break;
+ }
+
+ darray_last(line) = '\0';
+ } while ((ret = parse_yn_response(line.data)) < 0);
+ darray_exit(&line);
return ret;
}
#else
#include "tools-util.h"
-static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans)
{
char *buf = NULL;
size_t buflen = 0;
@@ -168,20 +199,93 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
return s;
}
-int bch2_fsck_err(struct bch_fs *c,
+/* s/fix?/fixing/ s/recreate?/recreating/ */
+static void prt_actioning(struct printbuf *out, const char *action)
+{
+ unsigned len = strlen(action);
+
+ BUG_ON(action[len - 1] != '?');
+ --len;
+
+ if (action[len - 1] == 'e')
+ --len;
+
+ prt_bytes(out, action, len);
+ prt_str(out, "ing");
+}
+
+static const u8 fsck_flags_extra[] = {
+#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
+ BCH_SB_ERRS()
+#undef x
+};
+
+static int do_fsck_ask_yn(struct bch_fs *c,
+ struct btree_trans *trans,
+ struct printbuf *question,
+ const char *action)
+{
+ prt_str(question, ", ");
+ prt_str(question, action);
+
+ if (bch2_fs_stdio_redirect(c))
+ bch2_print(c, "%s", question->buf);
+ else
+ bch2_print_string_as_lines(KERN_ERR, question->buf);
+
+ int ask = bch2_fsck_ask_yn(c, trans);
+
+ if (trans) {
+ int ret = bch2_trans_relock(trans);
+ if (ret)
+ return ret;
+ }
+
+ return ask;
+}
+
+int __bch2_fsck_err(struct bch_fs *c,
+ struct btree_trans *trans,
enum bch_fsck_flags flags,
enum bch_sb_error_id err,
const char *fmt, ...)
{
struct fsck_err_state *s = NULL;
va_list args;
- bool print = true, suppressing = false, inconsistent = false;
+ bool print = true, suppressing = false, inconsistent = false, exiting = false;
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
-
- if ((flags & FSCK_CAN_FIX) &&
- test_bit(err, c->sb.errors_silent))
- return -BCH_ERR_fsck_fix;
+ const char *action_orig = "fix?", *action = action_orig;
+
+ might_sleep();
+
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ flags |= fsck_flags_extra[err];
+
+ if (!c)
+ c = trans->c;
+
+ /*
+ * Ugly: if there's a transaction in the current task it has to be
+ * passed in to unlock if we prompt for user input.
+ *
+ * But, plumbing a transaction and transaction restarts into
+ * bkey_validate() is problematic.
+ *
+ * So:
+ * - make all bkey errors AUTOFIX, they're simple anyways (we just
+ * delete the key)
+ * - and we don't need to warn if we're not prompting
+ */
+ WARN_ON((flags & FSCK_CAN_FIX) &&
+ !(flags & FSCK_AUTOFIX) &&
+ !trans &&
+ bch2_current_has_btree_trans(c));
+
+ if (test_bit(err, c->sb.errors_silent))
+ return flags & FSCK_CAN_FIX
+ ? -BCH_ERR_fsck_fix
+ : -BCH_ERR_fsck_ignore;
bch2_sb_error_count(c, err);
@@ -189,6 +293,19 @@ int bch2_fsck_err(struct bch_fs *c,
prt_vprintf(out, fmt, args);
va_end(args);
+ /* Custom fix/continue/recreate/etc.? */
+ if (out->buf[out->pos - 1] == '?') {
+ const char *p = strrchr(out->buf, ',');
+ if (p) {
+ out->pos = p - out->buf;
+ action = kstrdup(p + 2, GFP_KERNEL);
+ if (!action) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+ }
+
mutex_lock(&c->fsck_error_msgs_lock);
s = fsck_err_get(c, fmt);
if (s) {
@@ -199,13 +316,15 @@ int bch2_fsck_err(struct bch_fs *c,
*/
if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
ret = s->ret;
- mutex_unlock(&c->fsck_error_msgs_lock);
- printbuf_exit(&buf);
- return ret;
+ goto err_unlock;
}
kfree(s->last_msg);
s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
+ if (!s->last_msg) {
+ ret = -ENOMEM;
+ goto err_unlock;
+ }
if (c->opts.ratelimit_errors &&
!(flags & FSCK_NO_RATELIMIT) &&
@@ -224,14 +343,28 @@ int bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
- if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if ((flags & FSCK_AUTOFIX) &&
+ (c->opts.errors == BCH_ON_ERROR_continue ||
+ c->opts.errors == BCH_ON_ERROR_fix_safe)) {
+ prt_str(out, ", ");
+ if (flags & FSCK_CAN_FIX) {
+ prt_actioning(out, action);
+ ret = -BCH_ERR_fsck_fix;
+ } else {
+ prt_str(out, ", continuing");
+ ret = -BCH_ERR_fsck_ignore;
+ }
+
+ goto print;
+ } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");
inconsistent = true;
ret = -BCH_ERR_fsck_errors_not_fixed;
} else if (flags & FSCK_CAN_FIX) {
- prt_str(out, ", fixing");
+ prt_str(out, ", ");
+ prt_actioning(out, action);
ret = -BCH_ERR_fsck_fix;
} else {
prt_str(out, ", continuing");
@@ -246,36 +379,31 @@ int bch2_fsck_err(struct bch_fs *c,
: c->opts.fix_errors;
if (fix == FSCK_FIX_ask) {
- int ask;
-
- prt_str(out, ": fix?");
- if (bch2_fs_stdio_redirect(c))
- bch2_print(c, "%s", out->buf);
- else
- bch2_print_string_as_lines(KERN_ERR, out->buf);
print = false;
- ask = bch2_fsck_ask_yn(c);
+ ret = do_fsck_ask_yn(c, trans, out, action);
+ if (ret < 0)
+ goto err_unlock;
- if (ask >= YN_ALLNO && s)
- s->fix = ask == YN_ALLNO
+ if (ret >= YN_ALLNO && s)
+ s->fix = ret == YN_ALLNO
? FSCK_FIX_no
: FSCK_FIX_yes;
- ret = ask & 1
+ ret = ret & 1
? -BCH_ERR_fsck_fix
: -BCH_ERR_fsck_ignore;
} else if (fix == FSCK_FIX_yes ||
(c->opts.nochanges &&
!(flags & FSCK_CAN_IGNORE))) {
- prt_str(out, ", fixing");
+ prt_str(out, ", ");
+ prt_actioning(out, action);
ret = -BCH_ERR_fsck_fix;
} else {
- prt_str(out, ", not fixing");
+ prt_str(out, ", not ");
+ prt_actioning(out, action);
}
- } else if (flags & FSCK_NEED_FSCK) {
- prt_str(out, " (run fsck to correct)");
- } else {
+ } else if (!(flags & FSCK_CAN_IGNORE)) {
prt_str(out, " (repair unimplemented)");
}
@@ -284,6 +412,13 @@ int bch2_fsck_err(struct bch_fs *c,
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;
+ if (test_bit(BCH_FS_fsck_running, &c->flags) &&
+ (ret != -BCH_ERR_fsck_fix &&
+ ret != -BCH_ERR_fsck_ignore)) {
+ exiting = true;
+ print = true;
+ }
+print:
if (print) {
if (bch2_fs_stdio_redirect(c))
bch2_print(c, "%s\n", out->buf);
@@ -291,9 +426,7 @@ int bch2_fsck_err(struct bch_fs *c,
bch2_print_string_as_lines(KERN_ERR, out->buf);
}
- if (test_bit(BCH_FS_fsck_running, &c->flags) &&
- (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore))
+ if (exiting)
bch_err(c, "Unable to continue, halting");
else if (suppressing)
bch_err(c, "Ratelimiting new instances of previous error");
@@ -301,20 +434,81 @@ int bch2_fsck_err(struct bch_fs *c,
if (s)
s->ret = ret;
- mutex_unlock(&c->fsck_error_msgs_lock);
+ if (inconsistent)
+ bch2_inconsistent_error(c);
+ /*
+ * We don't yet track whether the filesystem currently has errors, for
+ * log_fsck_err()s: that would require us to track for every error type
+ * which recovery pass corrects it, to get the fsck exit status correct:
+ */
+ if (flags & FSCK_CAN_FIX) {
+ if (ret == -BCH_ERR_fsck_fix) {
+ set_bit(BCH_FS_errors_fixed, &c->flags);
+ } else {
+ set_bit(BCH_FS_errors_not_fixed, &c->flags);
+ set_bit(BCH_FS_error, &c->flags);
+ }
+ }
+err_unlock:
+ mutex_unlock(&c->fsck_error_msgs_lock);
+err:
+ if (action != action_orig)
+ kfree(action);
printbuf_exit(&buf);
+ return ret;
+}
- if (inconsistent)
- bch2_inconsistent_error(c);
+static const char * const bch2_bkey_validate_contexts[] = {
+#define x(n) #n,
+ BKEY_VALIDATE_CONTEXTS()
+#undef x
+ NULL
+};
- if (ret == -BCH_ERR_fsck_fix) {
- set_bit(BCH_FS_errors_fixed, &c->flags);
- } else {
- set_bit(BCH_FS_errors_not_fixed, &c->flags);
- set_bit(BCH_FS_error, &c->flags);
+int __bch2_bkey_fsck_err(struct bch_fs *c,
+ struct bkey_s_c k,
+ struct bkey_validate_context from,
+ enum bch_sb_error_id err,
+ const char *fmt, ...)
+{
+ if (from.flags & BCH_VALIDATE_silent)
+ return -BCH_ERR_fsck_delete_bkey;
+
+ unsigned fsck_flags = 0;
+ if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) {
+ if (test_bit(err, c->sb.errors_silent))
+ return -BCH_ERR_fsck_delete_bkey;
+
+ fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX;
}
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ fsck_flags |= fsck_flags_extra[err];
+
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "invalid bkey in %s",
+ bch2_bkey_validate_contexts[from.from]);
+
+ if (from.from == BKEY_VALIDATE_journal)
+ prt_printf(&buf, " journal seq=%llu offset=%u",
+ from.journal_seq, from.journal_offset);
+
+ prt_str(&buf, " btree=");
+ bch2_btree_id_to_text(&buf, from.btree);
+ prt_printf(&buf, " level=%u: ", from.level);
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\n ");
+ va_list args;
+ va_start(args, fmt);
+ prt_vprintf(&buf, fmt, args);
+ va_end(args);
+
+ prt_str(&buf, ": delete?");
+
+ int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf);
+ printbuf_exit(&buf);
return ret;
}
@@ -335,3 +529,36 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
mutex_unlock(&c->fsck_error_msgs_lock);
}
+
+int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum)
+{
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ /* XXX: we don't yet attempt to print paths when we don't know the subvol */
+ if (inum.subvol)
+ ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out));
+ if (!inum.subvol || ret)
+ prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum);
+
+ return trans_was_restarted(trans, restart_count);
+}
+
+int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
+ subvol_inum inum, u64 offset)
+{
+ int ret = bch2_inum_err_msg_trans(trans, out, inum);
+ prt_printf(out, " offset %llu: ", offset);
+ return ret;
+}
+
+void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum)
+{
+ bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum));
+}
+
+void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
+ subvol_inum inum, u64 offset)
+{
+ bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
+}