summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/mds_client.c12
-rw-r--r--fs/ceph/mds_client.h11
-rw-r--r--fs/ceph/quota.c14
-rw-r--r--fs/ceph/snap.c10
-rw-r--r--fs/ceph/super.c75
-rw-r--r--fs/ceph/super.h3
7 files changed, 109 insertions, 22 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5c2b28ac9410..54041d9c1e25 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4247,6 +4247,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
dout("handle_caps from mds%d\n", session->s_mds);
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
end = msg->front.iov_base + msg->front.iov_len;
if (msg->front.iov_len < sizeof(*h))
@@ -4348,7 +4351,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
(unsigned)seq);
@@ -4457,6 +4459,8 @@ done:
done_unlocked:
iput(inode);
out:
+ ceph_dec_mds_stopping_blocker(mdsc);
+
ceph_put_string(extra_info.pool_ns);
/* Defer closing the sessions after s_mutex lock being released */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c257d75c5757..04a881343e43 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4889,6 +4889,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
dout("handle_lease from mds%d\n", mds);
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
goto bad;
@@ -4907,8 +4910,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
dname.len, dname.name);
mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
-
if (!inode) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
@@ -4970,9 +4971,13 @@ release:
out:
mutex_unlock(&session->s_mutex);
iput(inode);
+
+ ceph_dec_mds_stopping_blocker(mdsc);
return;
bad:
+ ceph_dec_mds_stopping_blocker(mdsc);
+
pr_err("corrupt lease message\n");
ceph_msg_dump(msg);
}
@@ -5168,6 +5173,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
}
init_completion(&mdsc->safe_umount_waiters);
+ spin_lock_init(&mdsc->stopping_lock);
+ atomic_set(&mdsc->stopping_blockers, 0);
+ init_completion(&mdsc->stopping_waiter);
init_waitqueue_head(&mdsc->session_close_wq);
INIT_LIST_HEAD(&mdsc->waiting_for_map);
mdsc->quotarealms_inodes = RB_ROOT;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 0477388a0d1c..1fa0f78b7b79 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -399,8 +399,9 @@ struct cap_wait {
};
enum {
- CEPH_MDSC_STOPPING_BEGIN = 1,
- CEPH_MDSC_STOPPING_FLUSHED = 2,
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHING = 2,
+ CEPH_MDSC_STOPPING_FLUSHED = 3,
};
/*
@@ -419,7 +420,11 @@ struct ceph_mds_client {
struct ceph_mds_session **sessions; /* NULL for mds if no session */
atomic_t num_sessions;
int max_sessions; /* len of sessions array */
- int stopping; /* true if shutting down */
+
+ spinlock_t stopping_lock; /* protect snap_empty */
+ int stopping; /* the stage of shutting down */
+ atomic_t stopping_blockers;
+ struct completion stopping_waiter;
atomic64_t quotarealms_count; /* # realms with quota */
/*
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 64592adfe48f..f7fcf7f08ec6 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -47,25 +47,23 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct inode *inode;
struct ceph_inode_info *ci;
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
if (msg->front.iov_len < sizeof(*h)) {
pr_err("%s corrupt message mds%d len %d\n", __func__,
session->s_mds, (int)msg->front.iov_len);
ceph_msg_dump(msg);
- return;
+ goto out;
}
- /* increment msg sequence number */
- mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
- mutex_unlock(&session->s_mutex);
-
/* lookup inode */
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
inode = ceph_find_inode(sb, vino);
if (!inode) {
pr_warn("Failed to find inode %llu\n", vino.ino);
- return;
+ goto out;
}
ci = ceph_inode(inode);
@@ -78,6 +76,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
spin_unlock(&ci->i_ceph_lock);
iput(inode);
+out:
+ ceph_dec_mds_stopping_blocker(mdsc);
}
static struct ceph_quotarealm_inode *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 343d738448dc..7ddc6bad77ef 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1015,6 +1015,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
int locked_rwsem = 0;
bool close_sessions = false;
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
if (msg->front.iov_len < sizeof(*h))
goto bad;
@@ -1030,10 +1033,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
mds, ceph_snap_op_name(op), split, trace_len);
- mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
- mutex_unlock(&session->s_mutex);
-
down_write(&mdsc->snap_rwsem);
locked_rwsem = 1;
@@ -1151,6 +1150,7 @@ skip_inode:
up_write(&mdsc->snap_rwsem);
flush_snaps(mdsc);
+ ceph_dec_mds_stopping_blocker(mdsc);
return;
bad:
@@ -1160,6 +1160,8 @@ out:
if (locked_rwsem)
up_write(&mdsc->snap_rwsem);
+ ceph_dec_mds_stopping_blocker(mdsc);
+
if (close_sessions)
ceph_mdsc_close_sessions(mdsc);
return;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75dd1b6b3d01..1c14d87ed871 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1462,25 +1462,90 @@ nomem:
return -ENOMEM;
}
+/*
+ * Return true if it successfully increases the blocker counter,
+ * or false if the mdsc is in stopping and flushed state.
+ */
+static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ spin_lock(&mdsc->stopping_lock);
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
+ spin_unlock(&mdsc->stopping_lock);
+ return false;
+ }
+ atomic_inc(&mdsc->stopping_blockers);
+ spin_unlock(&mdsc->stopping_lock);
+ return true;
+}
+
+static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ spin_lock(&mdsc->stopping_lock);
+ if (!atomic_dec_return(&mdsc->stopping_blockers) &&
+ mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
+ complete_all(&mdsc->stopping_waiter);
+ spin_unlock(&mdsc->stopping_lock);
+}
+
+/* For metadata IO requests */
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ mutex_lock(&session->s_mutex);
+ inc_session_sequence(session);
+ mutex_unlock(&session->s_mutex);
+
+ return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ __dec_stopping_blocker(mdsc);
+}
+
static void ceph_kill_sb(struct super_block *s)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ bool wait;
dout("kill_sb %p\n", s);
- ceph_mdsc_pre_umount(fsc->mdsc);
+ ceph_mdsc_pre_umount(mdsc);
flush_fs_workqueues(fsc);
/*
* Though the kill_anon_super() will finally trigger the
- * sync_filesystem() anyway, we still need to do it here
- * and then bump the stage of shutdown to stop the work
- * queue as earlier as possible.
+ * sync_filesystem() anyway, we still need to do it here and
+ * then bump the stage of shutdown. This will allow us to
+ * drop any further message, which will increase the inodes'
+ * i_count reference counters but makes no sense any more,
+ * from MDSs.
+ *
+ * Without this when evicting the inodes it may fail in the
+ * kill_anon_super(), which will trigger a warning when
+ * destroying the fscrypt keyring and then possibly trigger
+ * a further crash in ceph module when the iput() tries to
+ * evict the inodes later.
*/
sync_filesystem(s);
- fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+ spin_lock(&mdsc->stopping_lock);
+ mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
+ wait = !!atomic_read(&mdsc->stopping_blockers);
+ spin_unlock(&mdsc->stopping_lock);
+
+ if (wait && atomic_read(&mdsc->stopping_blockers)) {
+ long timeleft = wait_for_completion_killable_timeout(
+ &mdsc->stopping_waiter,
+ fsc->client->options->mount_timeout);
+ if (!timeleft) /* timed out */
+ pr_warn("umount timed out, %ld\n", timeleft);
+ else if (timeleft < 0) /* killed */
+ pr_warn("umount was killed, %ld\n", timeleft);
+ }
+ mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b5e54c8f010b..d8eb35d73a23 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1413,4 +1413,7 @@ extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */