summaryrefslogtreecommitdiff
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c133
1 files changed, 80 insertions, 53 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c4941ba245ac..a8d8b56cf9d2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -10,6 +10,7 @@
#include <linux/writeback.h>
#include <linux/iversion.h>
#include <linux/filelock.h>
+#include <linux/jiffies.h>
#include "super.h"
#include "mds_client.h"
@@ -977,20 +978,6 @@ int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
return 0;
}
-int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
-{
- struct inode *inode = &ci->netfs.inode;
- struct ceph_client *cl = ceph_inode_to_client(inode);
- int ret;
-
- spin_lock(&ci->i_ceph_lock);
- ret = __ceph_caps_revoking_other(ci, NULL, mask);
- spin_unlock(&ci->i_ceph_lock);
- doutc(cl, "%p %llx.%llx %s = %d\n", inode, ceph_vinop(inode),
- ceph_cap_string(mask), ret);
- return ret;
-}
-
int __ceph_caps_used(struct ceph_inode_info *ci)
{
int used = 0;
@@ -2016,6 +2003,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
* CHECK_CAPS_AUTHONLY - we should only check the auth cap
* CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
* further delay.
+ * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
+ * further delay.
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags)
{
@@ -2097,7 +2086,7 @@ retry:
}
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
- "flushing %s issued %s revoking %s retain %s %s%s%s\n",
+ "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
ceph_cap_string(ci->i_flushing_caps),
@@ -2105,7 +2094,8 @@ retry:
ceph_cap_string(retain),
(flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
(flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
- (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
+ (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
+ (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
/*
* If we no longer need to hold onto old our caps, and we may
@@ -2180,6 +2170,11 @@ retry:
queue_writeback = true;
}
+ if (flags & CHECK_CAPS_FLUSH_FORCE) {
+ doutc(cl, "force to flush caps\n");
+ goto ack;
+ }
+
if (cap == ci->i_auth_cap &&
(cap->issued & CEPH_CAP_FILE_WR)) {
/* request larger max_size from MDS? */
@@ -2804,7 +2799,7 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
* requested from the MDS.
*
* Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
- * or a negative error code. There are 3 speical error codes:
+ * or a negative error code. There are 3 special error codes:
* -EAGAIN: need to sleep but non-blocking is specified
* -EFBIG: ask caller to call check_max_size() and try again.
* -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
@@ -3067,10 +3062,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
flags, &_got);
WARN_ON_ONCE(ret == -EAGAIN);
if (!ret) {
+#ifdef CONFIG_DEBUG_FS
struct ceph_mds_client *mdsc = fsc->mdsc;
struct cap_wait cw;
+#endif
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+#ifdef CONFIG_DEBUG_FS
cw.ino = ceph_ino(inode);
cw.tgid = current->tgid;
cw.need = need;
@@ -3079,6 +3077,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
spin_lock(&mdsc->caps_list_lock);
list_add(&cw.list, &mdsc->cap_wait_list);
spin_unlock(&mdsc->caps_list_lock);
+#endif
/* make sure used fmode not timeout */
ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
@@ -3097,9 +3096,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
remove_wait_queue(&ci->i_cap_wq, &wait);
ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
+#ifdef CONFIG_DEBUG_FS
spin_lock(&mdsc->caps_list_lock);
list_del(&cw.list);
spin_unlock(&mdsc->caps_list_lock);
+#endif
if (ret == -EAGAIN)
continue;
@@ -3504,6 +3505,8 @@ static void handle_cap_grant(struct inode *inode,
bool queue_invalidate = false;
bool deleted_inode = false;
bool fill_inline = false;
+ bool revoke_wait = false;
+ int flags = 0;
/*
* If there is at least one crypto block then we'll trust
@@ -3699,16 +3702,18 @@ static void handle_cap_grant(struct inode *inode,
ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
ceph_cap_string(revoking));
if (S_ISREG(inode->i_mode) &&
- (revoking & used & CEPH_CAP_FILE_BUFFER))
+ (revoking & used & CEPH_CAP_FILE_BUFFER)) {
writeback = true; /* initiate writeback; will delay ack */
- else if (queue_invalidate &&
+ revoke_wait = true;
+ } else if (queue_invalidate &&
revoking == CEPH_CAP_FILE_CACHE &&
- (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
- ; /* do nothing yet, invalidation will be queued */
- else if (cap == ci->i_auth_cap)
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
+ revoke_wait = true; /* do nothing yet, invalidation will be queued */
+ } else if (cap == ci->i_auth_cap) {
check_caps = 1; /* check auth cap only */
- else
+ } else {
check_caps = 2; /* check all caps */
+ }
/* If there is new caps, try to wake up the waiters */
if (~cap->issued & newcaps)
wake = true;
@@ -3735,8 +3740,9 @@ static void handle_cap_grant(struct inode *inode,
BUG_ON(cap->issued & ~cap->implemented);
/* don't let check_caps skip sending a response to MDS for revoke msgs */
- if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+ if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
cap->mds_wanted = 0;
+ flags |= CHECK_CAPS_FLUSH_FORCE;
if (cap == ci->i_auth_cap)
check_caps = 1; /* check auth cap only */
else
@@ -3792,9 +3798,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex);
if (check_caps == 1)
- ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
else if (check_caps == 2)
- ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
}
/*
@@ -4065,23 +4071,22 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
struct ceph_cap *cap, *tcap, *new_cap = NULL;
struct ceph_inode_info *ci = ceph_inode(inode);
u64 t_cap_id;
- unsigned mseq = le32_to_cpu(ex->migrate_seq);
- unsigned t_seq, t_mseq;
+ u32 t_issue_seq, t_mseq;
int target, issued;
int mds = session->s_mds;
if (ph) {
t_cap_id = le64_to_cpu(ph->cap_id);
- t_seq = le32_to_cpu(ph->seq);
+ t_issue_seq = le32_to_cpu(ph->issue_seq);
t_mseq = le32_to_cpu(ph->mseq);
target = le32_to_cpu(ph->mds);
} else {
- t_cap_id = t_seq = t_mseq = 0;
+ t_cap_id = t_issue_seq = t_mseq = 0;
target = -1;
}
- doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d target %d\n",
- inode, ceph_vinop(inode), ci, mds, mseq, target);
+ doutc(cl, " cap %llx.%llx export to peer %d piseq %u pmseq %u\n",
+ ceph_vinop(inode), target, t_issue_seq, t_mseq);
retry:
down_read(&mdsc->snap_rwsem);
spin_lock(&ci->i_ceph_lock);
@@ -4114,12 +4119,12 @@ retry:
if (tcap) {
/* already have caps from the target */
if (tcap->cap_id == t_cap_id &&
- ceph_seq_cmp(tcap->seq, t_seq) < 0) {
+ ceph_seq_cmp(tcap->seq, t_issue_seq) < 0) {
doutc(cl, " updating import cap %p mds%d\n", tcap,
target);
tcap->cap_id = t_cap_id;
- tcap->seq = t_seq - 1;
- tcap->issue_seq = t_seq - 1;
+ tcap->seq = t_issue_seq - 1;
+ tcap->issue_seq = t_issue_seq - 1;
tcap->issued |= issued;
tcap->implemented |= issued;
if (cap == ci->i_auth_cap) {
@@ -4130,11 +4135,11 @@ retry:
ceph_remove_cap(mdsc, cap, false);
goto out_unlock;
} else if (tsession) {
- /* add placeholder for the export tagert */
+ /* add placeholder for the export target */
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
tcap = new_cap;
ceph_add_cap(inode, tsession, t_cap_id, issued, 0,
- t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
+ t_issue_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
if (!list_empty(&ci->i_cap_flush_list) &&
ci->i_auth_cap == tcap) {
@@ -4208,18 +4213,22 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
u64 realmino = le64_to_cpu(im->realm);
u64 cap_id = le64_to_cpu(im->cap_id);
u64 p_cap_id;
+ u32 piseq = 0;
+ u32 pmseq = 0;
int peer;
if (ph) {
p_cap_id = le64_to_cpu(ph->cap_id);
peer = le32_to_cpu(ph->mds);
+ piseq = le32_to_cpu(ph->issue_seq);
+ pmseq = le32_to_cpu(ph->mseq);
} else {
p_cap_id = 0;
peer = -1;
}
- doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d peer %d\n",
- inode, ceph_vinop(inode), ci, mds, mseq, peer);
+ doutc(cl, " cap %llx.%llx import from peer %d piseq %u pmseq %u\n",
+ ceph_vinop(inode), peer, piseq, pmseq);
retry:
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
@@ -4248,15 +4257,13 @@ retry:
doutc(cl, " remove export cap %p mds%d flags %d\n",
ocap, peer, ph->flags);
if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
- (ocap->seq != le32_to_cpu(ph->seq) ||
- ocap->mseq != le32_to_cpu(ph->mseq))) {
+ (ocap->seq != piseq ||
+ ocap->mseq != pmseq)) {
pr_err_ratelimited_client(cl, "mismatched seq/mseq: "
"%p %llx.%llx mds%d seq %d mseq %d"
" importer mds%d has peer seq %d mseq %d\n",
inode, ceph_vinop(inode), peer,
- ocap->seq, ocap->mseq, mds,
- le32_to_cpu(ph->seq),
- le32_to_cpu(ph->mseq));
+ ocap->seq, ocap->mseq, mds, piseq, pmseq);
}
ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
}
@@ -4330,7 +4337,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_snap_realm *realm = NULL;
int op;
int msg_version = le16_to_cpu(msg->hdr.version);
- u32 seq, mseq;
+ u32 seq, mseq, issue_seq;
struct ceph_vino vino;
void *snaptrace;
size_t snaptrace_len;
@@ -4340,8 +4347,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
bool close_sessions = false;
bool do_cap_release = false;
- doutc(cl, "from mds%d\n", session->s_mds);
-
if (!ceph_inc_mds_stopping_blocker(mdsc, session))
return;
@@ -4355,6 +4360,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap = CEPH_NOSNAP;
seq = le32_to_cpu(h->seq);
mseq = le32_to_cpu(h->migrate_seq);
+ issue_seq = le32_to_cpu(h->issue_seq);
snaptrace = h + 1;
snaptrace_len = le32_to_cpu(h->snap_trace_len);
@@ -4442,12 +4448,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
/* lookup ino */
inode = ceph_find_inode(mdsc->fsc->sb, vino);
- doutc(cl, " op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op),
- vino.ino, vino.snap, inode);
+ doutc(cl, " caps mds%d op %s ino %llx.%llx inode %p seq %u iseq %u mseq %u\n",
+ session->s_mds, ceph_cap_op_name(op), vino.ino, vino.snap, inode,
+ seq, issue_seq, mseq);
mutex_lock(&session->s_mutex);
- doutc(cl, " mds%d seq %lld cap seq %u\n", session->s_mds,
- session->s_seq, (unsigned)seq);
if (!inode) {
doutc(cl, " i don't have ino %llx\n", vino.ino);
@@ -4583,7 +4588,7 @@ flush_cap_releases:
__ceph_queue_cap_release(session, cap);
spin_unlock(&session->s_cap_lock);
}
- ceph_flush_cap_releases(mdsc, session);
+ ceph_flush_session_cap_releases(mdsc, session);
goto done;
bad:
@@ -4640,7 +4645,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
* slowness doesn't block mdsc delayed work,
* preventing send_renew_caps() from running.
*/
- if (jiffies - loop_start >= 5 * HZ)
+ if (time_after_eq(jiffies, loop_start + 5 * HZ))
break;
}
spin_unlock(&mdsc->cap_delay_lock);
@@ -4682,6 +4687,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
}
+/*
+ * Flush all cap releases to the mds
+ */
+static void flush_cap_releases(struct ceph_mds_session *s)
+{
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_client *cl = mdsc->fsc->client;
+
+ doutc(cl, "begin\n");
+ spin_lock(&s->s_cap_lock);
+ if (s->s_num_cap_releases)
+ ceph_flush_session_cap_releases(mdsc, s);
+ spin_unlock(&s->s_cap_lock);
+ doutc(cl, "done\n");
+
+}
+
+void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
+{
+ ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
+}
+
void __ceph_touch_fmode(struct ceph_inode_info *ci,
struct ceph_mds_client *mdsc, int fmode)
{