summaryrefslogtreecommitdiff
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c354
1 files changed, 319 insertions, 35 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 360b686c3c67..230e0c3f341f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -827,7 +827,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
* And the worst case is that for the none async openc request it will
* successfully open the file if the CDentry hasn't been unlinked yet,
* but later the previous delayed async unlink request will remove the
- * CDenty. That means the just created file is possiblly deleted later
+ * CDentry. That means the just created file is possibly deleted later
* by accident.
*
* We need to wait for the inflight async unlink requests to finish
@@ -1747,14 +1747,6 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
}
}
-void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
-{
- mutex_lock(&mdsc->mutex);
- __open_export_target_sessions(mdsc, session);
- mutex_unlock(&mdsc->mutex);
-}
-
/*
* session caps
*/
@@ -2266,7 +2258,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
trim_caps - remaining);
}
- ceph_flush_cap_releases(mdsc, session);
+ ceph_flush_session_cap_releases(mdsc, session);
return 0;
}
@@ -2362,7 +2354,7 @@ again:
item->ino = cpu_to_le64(cap->cap_ino);
item->cap_id = cpu_to_le64(cap->cap_id);
item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
+ item->issue_seq = cpu_to_le32(cap->issue_seq);
msg->front.iov_len += sizeof(*item);
ceph_put_cap(mdsc, cap);
@@ -2420,7 +2412,7 @@ static void ceph_cap_release_work(struct work_struct *work)
ceph_put_mds_session(session);
}
-void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
+void ceph_flush_session_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session)
{
struct ceph_client *cl = mdsc->fsc->client;
@@ -2447,7 +2439,7 @@ void __ceph_queue_cap_release(struct ceph_mds_session *session,
session->s_num_cap_releases++;
if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE))
- ceph_flush_cap_releases(session->s_mdsc, session);
+ ceph_flush_session_cap_releases(session->s_mdsc, session);
}
static void ceph_cap_reclaim_work(struct work_struct *work)
@@ -2629,6 +2621,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
{
struct inode *dir = req->r_parent;
struct dentry *dentry = req->r_dentry;
+ const struct qstr *name = req->r_dname;
u8 *cryptbuf = NULL;
u32 len = 0;
int ret = 0;
@@ -2649,8 +2642,10 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
if (!fscrypt_has_encryption_key(dir))
goto success;
- if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX,
- &len)) {
+ if (!name)
+ name = &dentry->d_name;
+
+ if (!fscrypt_fname_encrypted_size(dir, name->len, NAME_MAX, &len)) {
WARN_ON_ONCE(1);
return ERR_PTR(-ENAMETOOLONG);
}
@@ -2665,7 +2660,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
if (!cryptbuf)
return ERR_PTR(-ENOMEM);
- ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len);
+ ret = fscrypt_fname_encrypt(dir, name, cryptbuf, len);
if (ret) {
kfree(cryptbuf);
return ERR_PTR(ret);
@@ -2808,12 +2803,11 @@ retry:
if (pos < 0) {
/*
- * A rename didn't occur, but somehow we didn't end up where
- * we thought we would. Throw a warning and try again.
+ * The path is longer than PATH_MAX and this function
+ * cannot ever succeed. Creating paths that long is
+ * possible with Ceph, but Linux cannot use them.
*/
- pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n",
- pos);
- goto retry;
+ return ERR_PTR(-ENAMETOOLONG);
}
*pbase = base;
@@ -2954,12 +2948,12 @@ static struct ceph_mds_request_head_legacy *
find_legacy_request_head(void *p, u64 features)
{
bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
- struct ceph_mds_request_head_old *ohead;
+ struct ceph_mds_request_head *head;
if (legacy)
return (struct ceph_mds_request_head_legacy *)p;
- ohead = (struct ceph_mds_request_head_old *)p;
- return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid;
+ head = (struct ceph_mds_request_head *)p;
+ return (struct ceph_mds_request_head_legacy *)&head->oldest_client_tid;
}
/*
@@ -3029,7 +3023,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
if (legacy)
len = sizeof(struct ceph_mds_request_head_legacy);
else if (request_head_version == 1)
- len = sizeof(struct ceph_mds_request_head_old);
+ len = offsetofend(struct ceph_mds_request_head, args);
else if (request_head_version == 2)
len = offsetofend(struct ceph_mds_request_head, ext_num_fwd);
else
@@ -3113,11 +3107,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
msg->hdr.version = cpu_to_le16(3);
p = msg->front.iov_base + sizeof(*lhead);
} else if (request_head_version == 1) {
- struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
+ struct ceph_mds_request_head *nhead = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(4);
- ohead->version = cpu_to_le16(1);
- p = msg->front.iov_base + sizeof(*ohead);
+ nhead->version = cpu_to_le16(1);
+ p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, args);
} else if (request_head_version == 2) {
struct ceph_mds_request_head *nhead = msg->front.iov_base;
@@ -3269,12 +3263,12 @@ static int __prepare_send_request(struct ceph_mds_session *session,
&session->s_features);
/*
- * Avoid inifinite retrying after overflow. The client will
+ * Avoid infinite retrying after overflow. The client will
* increase the retry count and if the MDS is old version,
* so we limit to retry at most 256 times.
*/
if (req->r_attempts) {
- old_max_retry = sizeof_field(struct ceph_mds_request_head_old,
+ old_max_retry = sizeof_field(struct ceph_mds_request_head,
num_retry);
old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
if ((old_version && req->r_attempts >= old_max_retry) ||
@@ -3522,7 +3516,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
/*
* For async create we will choose the auth MDS of frag in parent
- * directory to send the request and ususally this works fine, but
+ * directory to send the request and usually this works fine, but
* if the migrated the dirtory to another MDS before it could handle
* it the request will be forwarded.
*
@@ -4033,7 +4027,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
__unregister_request(mdsc, req);
} else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) {
/*
- * Avoid inifinite retrying after overflow.
+ * Avoid infinite retrying after overflow.
*
* The MDS will increase the fwd count and in client side
* if the num_fwd is less than the one saved in request
@@ -4112,10 +4106,13 @@ static void handle_session(struct ceph_mds_session *session,
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
struct ceph_mds_session_head *h;
- u32 op;
+ struct ceph_mds_cap_auth *cap_auths = NULL;
+ u32 op, cap_auths_num = 0;
u64 seq, features = 0;
int wake = 0;
bool blocklisted = false;
+ u32 i;
+
/* decode */
ceph_decode_need(&p, end, sizeof(*h), bad);
@@ -4160,7 +4157,101 @@ static void handle_session(struct ceph_mds_session *session,
}
}
+ if (msg_version >= 6) {
+ ceph_decode_32_safe(&p, end, cap_auths_num, bad);
+ doutc(cl, "cap_auths_num %d\n", cap_auths_num);
+
+ if (cap_auths_num && op != CEPH_SESSION_OPEN) {
+ WARN_ON_ONCE(op != CEPH_SESSION_OPEN);
+ goto skip_cap_auths;
+ }
+
+ cap_auths = kcalloc(cap_auths_num,
+ sizeof(struct ceph_mds_cap_auth),
+ GFP_KERNEL);
+ if (!cap_auths) {
+ pr_err_client(cl, "No memory for cap_auths\n");
+ return;
+ }
+
+ for (i = 0; i < cap_auths_num; i++) {
+ u32 _len, j;
+
+ /* struct_v, struct_compat, and struct_len in MDSCapAuth */
+ ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
+
+ /* struct_v, struct_compat, and struct_len in MDSCapMatch */
+ ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
+ ceph_decode_64_safe(&p, end, cap_auths[i].match.uid, bad);
+ ceph_decode_32_safe(&p, end, _len, bad);
+ if (_len) {
+ cap_auths[i].match.gids = kcalloc(_len, sizeof(u32),
+ GFP_KERNEL);
+ if (!cap_auths[i].match.gids) {
+ pr_err_client(cl, "No memory for gids\n");
+ goto fail;
+ }
+
+ cap_auths[i].match.num_gids = _len;
+ for (j = 0; j < _len; j++)
+ ceph_decode_32_safe(&p, end,
+ cap_auths[i].match.gids[j],
+ bad);
+ }
+
+ ceph_decode_32_safe(&p, end, _len, bad);
+ if (_len) {
+ cap_auths[i].match.path = kcalloc(_len + 1, sizeof(char),
+ GFP_KERNEL);
+ if (!cap_auths[i].match.path) {
+ pr_err_client(cl, "No memory for path\n");
+ goto fail;
+ }
+ ceph_decode_copy(&p, cap_auths[i].match.path, _len);
+
+ /* Remove the tailing '/' */
+ while (_len && cap_auths[i].match.path[_len - 1] == '/') {
+ cap_auths[i].match.path[_len - 1] = '\0';
+ _len -= 1;
+ }
+ }
+
+ ceph_decode_32_safe(&p, end, _len, bad);
+ if (_len) {
+ cap_auths[i].match.fs_name = kcalloc(_len + 1, sizeof(char),
+ GFP_KERNEL);
+ if (!cap_auths[i].match.fs_name) {
+ pr_err_client(cl, "No memory for fs_name\n");
+ goto fail;
+ }
+ ceph_decode_copy(&p, cap_auths[i].match.fs_name, _len);
+ }
+
+ ceph_decode_8_safe(&p, end, cap_auths[i].match.root_squash, bad);
+ ceph_decode_8_safe(&p, end, cap_auths[i].readable, bad);
+ ceph_decode_8_safe(&p, end, cap_auths[i].writeable, bad);
+ doutc(cl, "uid %lld, num_gids %u, path %s, fs_name %s, root_squash %d, readable %d, writeable %d\n",
+ cap_auths[i].match.uid, cap_auths[i].match.num_gids,
+ cap_auths[i].match.path, cap_auths[i].match.fs_name,
+ cap_auths[i].match.root_squash,
+ cap_auths[i].readable, cap_auths[i].writeable);
+ }
+ }
+
+skip_cap_auths:
mutex_lock(&mdsc->mutex);
+ if (op == CEPH_SESSION_OPEN) {
+ if (mdsc->s_cap_auths) {
+ for (i = 0; i < mdsc->s_cap_auths_num; i++) {
+ kfree(mdsc->s_cap_auths[i].match.gids);
+ kfree(mdsc->s_cap_auths[i].match.path);
+ kfree(mdsc->s_cap_auths[i].match.fs_name);
+ }
+ kfree(mdsc->s_cap_auths);
+ }
+ mdsc->s_cap_auths_num = cap_auths_num;
+ mdsc->s_cap_auths = cap_auths;
+ }
if (op == CEPH_SESSION_CLOSE) {
ceph_get_mds_session(session);
__unregister_session(mdsc, session);
@@ -4243,7 +4334,7 @@ static void handle_session(struct ceph_mds_session *session,
/* flush cap releases */
spin_lock(&session->s_cap_lock);
if (session->s_num_cap_releases)
- ceph_flush_cap_releases(mdsc, session);
+ ceph_flush_session_cap_releases(mdsc, session);
spin_unlock(&session->s_cap_lock);
send_flushmsg_ack(mdsc, session, seq);
@@ -4290,6 +4381,13 @@ bad:
pr_err_client(cl, "corrupt message mds%d len %d\n", mds,
(int)msg->front.iov_len);
ceph_msg_dump(msg);
+fail:
+ for (i = 0; i < cap_auths_num; i++) {
+ kfree(cap_auths[i].match.gids);
+ kfree(cap_auths[i].match.path);
+ kfree(cap_auths[i].match.fs_name);
+ }
+ kfree(cap_auths);
return;
}
@@ -4806,7 +4904,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
} else {
recon_state.msg_version = 2;
}
- /* trsaverse this session's caps */
+ /* traverse this session's caps */
err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state);
spin_lock(&session->s_cap_lock);
@@ -5342,6 +5440,8 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
+ ceph_flush_session_cap_releases(mdsc, s);
+
mutex_lock(&s->s_mutex);
if (renew_caps)
send_renew_caps(mdsc, s);
@@ -5389,6 +5489,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
spin_lock_init(&mdsc->stopping_lock);
atomic_set(&mdsc->stopping_blockers, 0);
init_completion(&mdsc->stopping_waiter);
+ atomic64_set(&mdsc->dirty_folios, 0);
+ init_waitqueue_head(&mdsc->flush_end_wq);
init_waitqueue_head(&mdsc->session_close_wq);
INIT_LIST_HEAD(&mdsc->waiting_for_map);
mdsc->quotarealms_inodes = RB_ROOT;
@@ -5401,7 +5503,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
mdsc->last_renew_caps = jiffies;
INIT_LIST_HEAD(&mdsc->cap_delay_list);
+#ifdef CONFIG_DEBUG_FS
INIT_LIST_HEAD(&mdsc->cap_wait_list);
+#endif
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
@@ -5499,6 +5603,173 @@ void send_flush_mdlog(struct ceph_mds_session *s)
mutex_unlock(&s->s_mutex);
}
+static int ceph_mds_auth_match(struct ceph_mds_client *mdsc,
+ struct ceph_mds_cap_auth *auth,
+ const struct cred *cred,
+ char *tpath)
+{
+ u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid);
+ u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid);
+ struct ceph_client *cl = mdsc->fsc->client;
+ const char *spath = mdsc->fsc->mount_options->server_path;
+ bool gid_matched = false;
+ u32 gid, tlen, len;
+ int i, j;
+
+ doutc(cl, "match.uid %lld\n", auth->match.uid);
+ if (auth->match.uid != MDS_AUTH_UID_ANY) {
+ if (auth->match.uid != caller_uid)
+ return 0;
+ if (auth->match.num_gids) {
+ for (i = 0; i < auth->match.num_gids; i++) {
+ if (caller_gid == auth->match.gids[i])
+ gid_matched = true;
+ }
+ if (!gid_matched && cred->group_info->ngroups) {
+ for (i = 0; i < cred->group_info->ngroups; i++) {
+ gid = from_kgid(&init_user_ns,
+ cred->group_info->gid[i]);
+ for (j = 0; j < auth->match.num_gids; j++) {
+ if (gid == auth->match.gids[j]) {
+ gid_matched = true;
+ break;
+ }
+ }
+ if (gid_matched)
+ break;
+ }
+ }
+ if (!gid_matched)
+ return 0;
+ }
+ }
+
+ /* path match */
+ if (auth->match.path) {
+ if (!tpath)
+ return 0;
+
+ tlen = strlen(tpath);
+ len = strlen(auth->match.path);
+ if (len) {
+ char *_tpath = tpath;
+ bool free_tpath = false;
+ int m, n;
+
+ doutc(cl, "server path %s, tpath %s, match.path %s\n",
+ spath, tpath, auth->match.path);
+ if (spath && (m = strlen(spath)) != 1) {
+ /* mount path + '/' + tpath + an extra space */
+ n = m + 1 + tlen + 1;
+ _tpath = kmalloc(n, GFP_NOFS);
+ if (!_tpath)
+ return -ENOMEM;
+ /* remove the leading '/' */
+ snprintf(_tpath, n, "%s/%s", spath + 1, tpath);
+ free_tpath = true;
+ tlen = strlen(_tpath);
+ }
+
+ /*
+ * Please note the tailing '/' for match.path has already
+ * been removed when parsing.
+ *
+ * Remove the tailing '/' for the target path.
+ */
+ while (tlen && _tpath[tlen - 1] == '/') {
+ _tpath[tlen - 1] = '\0';
+ tlen -= 1;
+ }
+ doutc(cl, "_tpath %s\n", _tpath);
+
+ /*
+ * In case first == _tpath && tlen == len:
+ * match.path=/foo --> /foo _path=/foo --> match
+ * match.path=/foo/ --> /foo _path=/foo --> match
+ *
+ * In case first == _tmatch.path && tlen > len:
+ * match.path=/foo/ --> /foo _path=/foo/ --> match
+ * match.path=/foo --> /foo _path=/foo/ --> match
+ * match.path=/foo/ --> /foo _path=/foo/d --> match
+ * match.path=/foo --> /foo _path=/food --> mismatch
+ *
+ * All the other cases --> mismatch
+ */
+ bool path_matched = true;
+ char *first = strstr(_tpath, auth->match.path);
+ if (first != _tpath ||
+ (tlen > len && _tpath[len] != '/')) {
+ path_matched = false;
+ }
+
+ if (free_tpath)
+ kfree(_tpath);
+
+ if (!path_matched)
+ return 0;
+ }
+ }
+
+ doutc(cl, "matched\n");
+ return 1;
+}
+
+int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, int mask)
+{
+ const struct cred *cred = get_current_cred();
+ u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid);
+ u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid);
+ struct ceph_mds_cap_auth *rw_perms_s = NULL;
+ struct ceph_client *cl = mdsc->fsc->client;
+ bool root_squash_perms = true;
+ int i, err;
+
+ doutc(cl, "tpath '%s', mask %d, caller_uid %d, caller_gid %d\n",
+ tpath, mask, caller_uid, caller_gid);
+
+ for (i = 0; i < mdsc->s_cap_auths_num; i++) {
+ struct ceph_mds_cap_auth *s = &mdsc->s_cap_auths[i];
+
+ err = ceph_mds_auth_match(mdsc, s, cred, tpath);
+ if (err < 0) {
+ put_cred(cred);
+ return err;
+ } else if (err > 0) {
+ /* always follow the last auth caps' permission */
+ root_squash_perms = true;
+ rw_perms_s = NULL;
+ if ((mask & MAY_WRITE) && s->writeable &&
+ s->match.root_squash && (!caller_uid || !caller_gid))
+ root_squash_perms = false;
+
+ if (((mask & MAY_WRITE) && !s->writeable) ||
+ ((mask & MAY_READ) && !s->readable))
+ rw_perms_s = s;
+ }
+ }
+
+ put_cred(cred);
+
+ doutc(cl, "root_squash_perms %d, rw_perms_s %p\n", root_squash_perms,
+ rw_perms_s);
+ if (root_squash_perms && rw_perms_s == NULL) {
+ doutc(cl, "access allowed\n");
+ return 0;
+ }
+
+ if (!root_squash_perms) {
+ doutc(cl, "root_squash is enabled and user(%d %d) isn't allowed to write",
+ caller_uid, caller_gid);
+ }
+ if (rw_perms_s) {
+ doutc(cl, "mds auth caps readable/writeable %d/%d while request r/w %d/%d",
+ rw_perms_s->readable, rw_perms_s->writeable,
+ !!(mask & MAY_READ), !!(mask & MAY_WRITE));
+ }
+ doutc(cl, "access denied\n");
+ return -EACCES;
+}
+
/*
* called before mount is ro, and before dentries are torn down.
* (hmm, does this still race with new lookups?)
@@ -5605,6 +5876,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
mutex_unlock(&mdsc->mutex);
ceph_flush_dirty_caps(mdsc);
+ ceph_flush_cap_releases(mdsc);
spin_lock(&mdsc->cap_dirty_lock);
want_flush = mdsc->last_cap_flush_tid;
if (!list_empty(&mdsc->cap_flush_list)) {
@@ -5743,6 +6015,18 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
ceph_caps_finalize(mdsc);
+
+ if (mdsc->s_cap_auths) {
+ int i;
+
+ for (i = 0; i < mdsc->s_cap_auths_num; i++) {
+ kfree(mdsc->s_cap_auths[i].match.gids);
+ kfree(mdsc->s_cap_auths[i].match.path);
+ kfree(mdsc->s_cap_auths[i].match.fs_name);
+ }
+ kfree(mdsc->s_cap_auths);
+ }
+
ceph_pool_perm_destroy(mdsc);
}