summaryrefslogtreecommitdiff
path: root/fs/ceph/snap.c
diff options
context:
space:
mode:
authorXiubo Li <xiubli@redhat.com>2023-02-01 09:36:45 +0800
committerIlya Dryomov <idryomov@gmail.com>2023-02-02 13:58:15 +0100
commita68e564adcaa69b0930809fb64d9d5f7d9c32ba9 (patch)
tree061fa00fa614c4a83aec06c620905dfe2a0eff5e /fs/ceph/snap.c
parentb38b17b6a01ca4e738af097a1529910646ef4270 (diff)
ceph: blocklist the kclient when receiving corrupted snap trace
When received corrupted snap trace we don't know what exactly has happened in MDS side. And we shouldn't continue IOs and metadatas access to MDS, which may corrupt or get incorrect contents. This patch will just block all the further IO/MDS requests immediately and then evict the kclient itself. The reason why we still need to evict the kclient just after blocking all the further IOs is that the MDS could revoke the caps faster. Link: https://tracker.ceph.com/issues/57686 Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Venky Shankar <vshankar@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/snap.c')
-rw-r--r--fs/ceph/snap.c36
1 files changed, 34 insertions, 2 deletions
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e4151852184e..87007203f130 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
+#include <linux/fs.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/iversion.h>
@@ -766,8 +767,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm;
struct ceph_snap_realm *first_realm = NULL;
struct ceph_snap_realm *realm_to_rebuild = NULL;
+ struct ceph_client *client = mdsc->fsc->client;
int rebuild_snapcs;
int err = -ENOMEM;
+ int ret;
LIST_HEAD(dirty_realms);
lockdep_assert_held_write(&mdsc->snap_rwsem);
@@ -884,6 +887,27 @@ fail:
if (first_realm)
ceph_put_snap_realm(mdsc, first_realm);
pr_err("%s error %d\n", __func__, err);
+
+ /*
+ * When receiving a corrupted snap trace we don't know what
+ * exactly has happened in MDS side. And we shouldn't continue
+ * writing to OSD, which may corrupt the snapshot contents.
+ *
+ * Just try to blocklist this kclient and then this kclient
+ * must be remounted to continue after the corrupted metadata
+ * fixed in the MDS side.
+ */
+ WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO);
+ ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr);
+ if (ret)
+ pr_err("%s failed to blocklist %s: %d\n", __func__,
+ ceph_pr_addr(&client->msgr.inst.addr), ret);
+
+ WARN(1, "%s: %s%sdo remount to continue%s",
+ __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr),
+ ret ? "" : " was blocklisted, ",
+ err == -EIO ? " after corrupted snaptrace is fixed" : "");
+
return err;
}
@@ -984,6 +1008,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
__le64 *split_inos = NULL, *split_realms = NULL;
int i;
int locked_rwsem = 0;
+ bool close_sessions = false;
/* decode */
if (msg->front.iov_len < sizeof(*h))
@@ -1092,8 +1117,12 @@ skip_inode:
* update using the provided snap trace. if we are deleting a
* snap, we can avoid queueing cap_snaps.
*/
- ceph_update_snap_trace(mdsc, p, e,
- op == CEPH_SNAP_OP_DESTROY, NULL);
+ if (ceph_update_snap_trace(mdsc, p, e,
+ op == CEPH_SNAP_OP_DESTROY,
+ NULL)) {
+ close_sessions = true;
+ goto bad;
+ }
if (op == CEPH_SNAP_OP_SPLIT)
/* we took a reference when we created the realm, above */
@@ -1112,6 +1141,9 @@ bad:
out:
if (locked_rwsem)
up_write(&mdsc->snap_rwsem);
+
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
return;
}