summaryrefslogtreecommitdiff
path: root/fs/ceph/super.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-20 13:46:20 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-20 13:46:20 +0200
commit64f29d8856a9e0d1fcdc5344f76e70c364b941cb (patch)
tree7faf56634864329b8b23d9ffdda7fa584fc3e96c /fs/ceph/super.c
parent67ed868d23711e9cec9e8adb7ae5596ae76f7cbb (diff)
parenta0b3a15eab6bc2e90008460b646d53e7d9dcdbbb (diff)
Merge tag 'ceph-for-5.17-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The highlight is the new mount "device" string syntax implemented by Venky Shankar. It solves some long-standing issues with using different auth entities and/or mounting different CephFS filesystems from the same cluster, remounting and also misleading /proc/mounts contents. The existing syntax of course remains to be maintained. On top of that, there is a couple of fixes for edge cases in quota and a new mount option for turning on unbuffered I/O mode globally instead of on a per-file basis with ioctl(CEPH_IOC_SYNCIO)" * tag 'ceph-for-5.17-rc1' of git://github.com/ceph/ceph-client: ceph: move CEPH_SUPER_MAGIC definition to magic.h ceph: remove redundant Lsx caps check ceph: add new "nopagecache" option ceph: don't check for quotas on MDS stray dirs ceph: drop send metrics debug message rbd: make const pointer spaces a static const array ceph: Fix incorrect statfs report for small quota ceph: mount syntax module parameter doc: document new CephFS mount device syntax ceph: record updated mon_addr on remount ceph: new device mount syntax libceph: rename parse_fsid() to ceph_parse_fsid() and export libceph: generalize addr/ip parsing based on delimiter
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r--fs/ceph/super.c169
1 files changed, 159 insertions, 10 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bea89bdb534a..bf79f369aec6 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -27,6 +27,8 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
+#include <uapi/linux/magic.h>
+
static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list);
@@ -146,6 +148,7 @@ enum {
Opt_mds_namespace,
Opt_recover_session,
Opt_source,
+ Opt_mon_addr,
/* string args above */
Opt_dirstat,
Opt_rbytes,
@@ -159,6 +162,7 @@ enum {
Opt_quotadf,
Opt_copyfrom,
Opt_wsync,
+ Opt_pagecache,
};
enum ceph_recover_session_mode {
@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source),
+ fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
+ fsparam_flag_no ("pagecache", Opt_pagecache),
{}
};
@@ -228,9 +234,92 @@ static void canonicalize_path(char *path)
}
/*
- * Parse the source parameter. Distinguish the server list from the path.
+ * Check if the mds namespace in ceph_mount_options matches
+ * the passed in namespace string. First time match (when
+ * ->mds_namespace is NULL) is treated specially, since
+ * ->mds_namespace needs to be initialized by the caller.
+ */
+static int namespace_equals(struct ceph_mount_options *fsopt,
+ const char *namespace, size_t len)
+{
+ return !(fsopt->mds_namespace &&
+ (strlen(fsopt->mds_namespace) != len ||
+ strncmp(fsopt->mds_namespace, namespace, len)));
+}
+
+static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ int r;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ if (*dev_name_end != ':')
+ return invalfc(fc, "separator ':' missing in source");
+
+ r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
+ pctx->copts, fc->log.log, ',');
+ if (r)
+ return r;
+
+ fsopt->new_dev_syntax = false;
+ return 0;
+}
+
+static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ size_t len;
+ struct ceph_fsid fsid;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+ char *fsid_start, *fs_name_start;
+
+ if (*dev_name_end != '=') {
+ dout("separator '=' missing in source");
+ return -EINVAL;
+ }
+
+ fsid_start = strchr(dev_name, '@');
+ if (!fsid_start)
+ return invalfc(fc, "missing cluster fsid");
+ ++fsid_start; /* start of cluster fsid */
+
+ fs_name_start = strchr(fsid_start, '.');
+ if (!fs_name_start)
+ return invalfc(fc, "missing file system name");
+
+ if (ceph_parse_fsid(fsid_start, &fsid))
+ return invalfc(fc, "Invalid FSID");
+
+ ++fs_name_start; /* start of file system name */
+ len = dev_name_end - fs_name_start;
+
+ if (!namespace_equals(fsopt, fs_name_start, len))
+ return invalfc(fc, "Mismatching mds_namespace");
+ kfree(fsopt->mds_namespace);
+ fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
+ if (!fsopt->mds_namespace)
+ return -ENOMEM;
+ dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
+
+ fsopt->new_dev_syntax = true;
+ return 0;
+}
+
+/*
+ * Parse the source parameter for new device format. Distinguish the device
+ * spec from the path. Try parsing new device format and fallback to old
+ * format if needed.
+ *
+ * New device syntax will looks like:
+ * <device_spec>=/<path>
+ * where
+ * <device_spec> is name@fsid.fsname
+ * <path> is optional, but if present must begin with '/'
+ * (monitor addresses are passed via mount option)
*
- * The source will look like:
+ * Old device syntax is:
* <server_spec>[,<server_spec>...]:[<path>]
* where
* <server_spec> is <ip>[:<port>]
@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = dev_name + strlen(dev_name);
}
- dev_name_end--; /* back up to ':' separator */
- if (dev_name_end < dev_name || *dev_name_end != ':')
- return invalfc(fc, "No path or : separator in source");
+ dev_name_end--; /* back up to separator */
+ if (dev_name_end < dev_name)
+ return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
- ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
- pctx->copts, fc->log.log);
- if (ret)
- return ret;
+ dout("trying new device syntax");
+ ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
+ if (ret) {
+ if (ret != -EINVAL)
+ return ret;
+ dout("trying old device syntax");
+ ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
+ if (ret)
+ return ret;
+ }
fc->source = param->string;
param->string = NULL;
return 0;
}
+static int ceph_parse_mon_addr(struct fs_parameter *param,
+ struct fs_context *fc)
+{
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ kfree(fsopt->mon_addr);
+ fsopt->mon_addr = param->string;
+ param->string = NULL;
+
+ return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
+ pctx->copts, fc->log.log, '/');
+}
+
static int ceph_parse_mount_param(struct fs_context *fc,
struct fs_parameter *param)
{
@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
param->string = NULL;
break;
case Opt_mds_namespace:
+ if (!namespace_equals(fsopt, param->string, strlen(param->string)))
+ return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = param->string;
param->string = NULL;
@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
if (fc->source)
return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc);
+ case Opt_mon_addr:
+ return ceph_parse_mon_addr(param, fc);
case Opt_wsize:
if (result.uint_32 < PAGE_SIZE ||
result.uint_32 > CEPH_MAX_WRITE_SIZE)
@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break;
+ case Opt_pagecache:
+ if (result.negated)
+ fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
+ else
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
+ break;
default:
BUG();
}
@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->mds_namespace);
kfree(args->server_path);
kfree(args->fscache_uniq);
+ kfree(args->mon_addr);
kfree(args);
}
@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
if (ret)
return ret;
+ ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
+ if (ret)
+ return ret;
+
return ceph_compare_options(new_opt, fsc->client);
}
@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom");
- if (fsopt->mds_namespace)
+ /* dump mds_namespace when old device syntax is in use */
+ if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+ if (fsopt->mon_addr)
+ seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
+
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync");
+ if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ seq_puts(m, ",nopagecache");
+
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
static int ceph_get_tree(struct fs_context *fc)
{
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
struct super_block *sb;
struct ceph_fs_client *fsc;
struct dentry *res;
@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source)
return invalfc(fc, "No source");
+ if (fsopt->new_dev_syntax && !fsopt->mon_addr)
+ return invalfc(fc, "No monitor address");
/* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts);
@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
+ if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
+ kfree(fsc->mount_options->mon_addr);
+ fsc->mount_options->mon_addr = fsopt->mon_addr;
+ fsopt->mon_addr = NULL;
+ pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+ }
+
sync_filesystem(fc->root->d_sb);
return 0;
}
@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false;
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
+/* for both v1 and v2 syntax */
+static bool mount_support = true;
+static const struct kernel_param_ops param_ops_mount_syntax = {
+ .get = param_get_bool,
+};
+module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
+module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
+
module_init(init_ceph);
module_exit(exit_ceph);