From 6b0a877422108372ac25b41ab651e6aa9ed273a6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Nov 2019 22:36:46 +0000 Subject: rbd: fix spelling mistake "requeueing" -> "requeuing" There is a spelling mistake in a debug message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 13527a0b4e44..564520185175 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4230,7 +4230,7 @@ again: * lock owner acked, but resend if we don't see them * release the lock */ - dout("%s rbd_dev %p requeueing lock_dwork\n", __func__, + dout("%s rbd_dev %p requeuing lock_dwork\n", __func__, rbd_dev); mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC)); -- cgit From f3c0e45900a60e1de1a03f74327ea341e713ea45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 7 Nov 2019 16:22:10 +0100 Subject: rbd: introduce rbd_is_snap() Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 564520185175..2f65798c40c0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -514,6 +514,11 @@ static int minor_to_rbd_dev_id(int minor) return minor >> RBD_SINGLE_MAJOR_PART_SHIFT; } +static bool rbd_is_snap(struct rbd_device *rbd_dev) +{ + return rbd_dev->spec->snap_id != CEPH_NOSNAP; +} + static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev) { lockdep_assert_held(&rbd_dev->lock_rwsem); @@ -696,7 +701,7 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) return -EFAULT; /* Snapshots can't be marked read-write */ - if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro) + if (rbd_is_snap(rbd_dev) && !ro) return -EROFS; /* Let blkdev_roset() handle it */ @@ -3555,7 +3560,7 @@ static bool need_exclusive_lock(struct rbd_img_request *img_req) if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) return false; - if (rbd_dev->spec->snap_id != CEPH_NOSNAP) + if (rbd_is_snap(rbd_dev)) return false; rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags)); @@ -4826,7 +4831,7 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } - if (op_type != OBJ_OP_READ && rbd_dev->spec->snap_id != CEPH_NOSNAP) { + if (op_type != OBJ_OP_READ && rbd_is_snap(rbd_dev)) { rbd_warn(rbd_dev, "%s on read-only snapshot", obj_op_name(op_type)); result = -EIO; @@ -4841,7 +4846,7 @@ static void rbd_queue_workfn(struct work_struct *work) */ if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { dout("request for non-existent snapshot"); - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + rbd_assert(rbd_is_snap(rbd_dev)); result = -ENXIO; goto err_rq; } @@ -5084,7 +5089,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } - if (rbd_dev->spec->snap_id == CEPH_NOSNAP) { + if (!rbd_is_snap(rbd_dev)) { rbd_dev->mapping.size = rbd_dev->header.image_size; } else { /* validate mapped snapshot's EXISTS flag */ @@ -6632,7 +6637,7 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) return -EINVAL; } - if (rbd_dev->spec->snap_id != CEPH_NOSNAP) + if (rbd_is_snap(rbd_dev)) return 0; rbd_assert(!rbd_is_lock_owner(rbd_dev)); @@ -7003,7 +7008,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (ret) goto err_out_probe; - if (rbd_dev->spec->snap_id != CEPH_NOSNAP && + if (rbd_is_snap(rbd_dev) && (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) { ret = rbd_object_map_load(rbd_dev); if (ret) @@ -7093,7 +7098,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, } /* If we are mapping a snapshot it must be marked read-only */ - if (rbd_dev->spec->snap_id != CEPH_NOSNAP) + if (rbd_is_snap(rbd_dev)) rbd_dev->opts->read_only = true; if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) { -- cgit From 39258aa2db8175271d8079b7685a6e328a8c1a63 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 7 Nov 2019 17:16:23 +0100 Subject: rbd: introduce RBD_DEV_FLAG_READONLY rbd_dev->opts is not available for parent images, making checking rbd_dev->opts->read_only in various places (rbd_dev_image_probe(), need_exclusive_lock(), use_object_map() in the following patches) harder than it needs to be. Keeping rbd_dev_image_probe() in mind, move the initialization in do_rbd_add() up. snap_id isn't filled in at that point, so replace rbd_is_snap() with a snap_name comparison. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2f65798c40c0..978549d21f4f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -464,6 +464,7 @@ struct rbd_device { enum rbd_dev_flags { RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ + RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */ }; static DEFINE_MUTEX(client_mutex); /* Serialize client creation */ @@ -514,6 +515,11 @@ static int minor_to_rbd_dev_id(int minor) return minor >> RBD_SINGLE_MAJOR_PART_SHIFT; } +static bool rbd_is_ro(struct rbd_device *rbd_dev) +{ + return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); +} + static bool rbd_is_snap(struct rbd_device *rbd_dev) { return rbd_dev->spec->snap_id != CEPH_NOSNAP; @@ -6843,6 +6849,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth) __rbd_get_client(rbd_dev->rbd_client); rbd_spec_get(rbd_dev->parent_spec); + __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags); + ret = rbd_dev_image_probe(parent, depth); if (ret < 0) goto out_err; @@ -6894,7 +6902,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) goto err_out_blkdev; set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); - set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only); + set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev)); ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); if (ret) @@ -7084,6 +7092,11 @@ static ssize_t do_rbd_add(struct bus_type *bus, spec = NULL; /* rbd_dev now owns this */ rbd_opts = NULL; /* rbd_dev now owns this */ + /* if we are mapping a snapshot it will be a read-only mapping */ + if (rbd_dev->opts->read_only || + strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME)) + __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); + rbd_dev->config_info = kstrdup(buf, GFP_KERNEL); if (!rbd_dev->config_info) { rc = -ENOMEM; @@ -7097,10 +7110,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_rbd_dev; } - /* If we are mapping a snapshot it must be marked read-only */ - if (rbd_is_snap(rbd_dev)) - rbd_dev->opts->read_only = true; - if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) { rbd_warn(rbd_dev, "alloc_size adjusted to %u", rbd_dev->layout.object_size); -- cgit From b948ad78971fb2c6ed6b53b0edbdd720cfe08d9f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 8 Nov 2019 15:26:51 +0100 Subject: rbd: treat images mapped read-only seriously Even though -o ro/-o read_only/--read-only options are very old, we have never really treated them seriously (on par with snapshots). As a first step, fail writes to images mapped read-only just like we do for snapshots. We need this check in rbd because the block layer basically ignores read-only setting, see commit a32e236eb93e ("Partially revert "block: fail op_is_write() requests to read-only partitions""). Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 978549d21f4f..02cd2a7df6dd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4837,11 +4837,14 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } - if (op_type != OBJ_OP_READ && rbd_is_snap(rbd_dev)) { - rbd_warn(rbd_dev, "%s on read-only snapshot", - obj_op_name(op_type)); - result = -EIO; - goto err; + if (op_type != OBJ_OP_READ) { + if (rbd_is_ro(rbd_dev)) { + rbd_warn(rbd_dev, "%s on read-only mapping", + obj_op_name(op_type)); + result = -EIO; + goto err; + } + rbd_assert(!rbd_is_snap(rbd_dev)); } /* -- cgit From c1b6205730ef009868fbb68cf4755b20055fcc6c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 12 Nov 2019 19:50:55 +0100 Subject: rbd: disallow read-write partitions on images mapped read-only If an image is mapped read-only, don't allow setting its partition(s) to read-write via BLKROSET: with the previous patch all writes to such images are failed anyway. If an image is mapped read-write, its partition(s) can be set to read-only (and back to read-write) as before. Note that at the rbd level the image will remain writeable: anything sent down by the block layer will be executed, including any write from internal kernel users. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 02cd2a7df6dd..978e4d846f64 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -706,9 +706,16 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) if (get_user(ro, (int __user *)arg)) return -EFAULT; - /* Snapshots can't be marked read-write */ - if (rbd_is_snap(rbd_dev) && !ro) - return -EROFS; + /* + * Both images mapped read-only and snapshots can't be marked + * read-write. + */ + if (!ro) { + if (rbd_is_ro(rbd_dev)) + return -EROFS; + + rbd_assert(!rbd_is_snap(rbd_dev)); + } /* Let blkdev_roset() handle it */ return -ENOTTY; -- cgit From 3fe69921dbb29e7335e5c244d1ef66efd154f84b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 12 Nov 2019 19:41:48 +0100 Subject: rbd: don't acquire exclusive lock for read-only mappings A read-only mapping should be usable with read-only OSD caps, so neither the header lock nor the object map lock can be acquired. Unfortunately, this means that images mapped read-only lose the advantage of the object map. Snapshots, however, can take advantage of the object map without any exclusionary locks, so if the object map is desired, snapshot the image and map the snapshot instead of the image. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 978e4d846f64..c5a56133c260 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1850,6 +1850,17 @@ static u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno) static bool use_object_map(struct rbd_device *rbd_dev) { + /* + * An image mapped read-only can't use the object map -- it isn't + * loaded because the header lock isn't acquired. Someone else can + * write to the image and update the object map behind our back. + * + * A snapshot can't be written to, so using the object map is always + * safe. + */ + if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev)) + return false; + return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) && !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)); } @@ -3573,7 +3584,7 @@ static bool need_exclusive_lock(struct rbd_img_request *img_req) if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) return false; - if (rbd_is_snap(rbd_dev)) + if (rbd_is_ro(rbd_dev)) return false; rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags)); @@ -6653,7 +6664,7 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) return -EINVAL; } - if (rbd_is_snap(rbd_dev)) + if (rbd_is_ro(rbd_dev)) return 0; rbd_assert(!rbd_is_lock_owner(rbd_dev)); -- cgit From b9ef2b8858a0cf07d5f1b201abaf2480f4aa8201 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 12 Nov 2019 20:20:04 +0100 Subject: rbd: don't establish watch for read-only mappings With exclusive lock out of the way, watch is the only thing left that prevents a read-only mapping from being used with read-only OSD caps. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c5a56133c260..2a22bc24c886 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6961,6 +6961,24 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) return ret; } +static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap) +{ + if (!is_snap) { + pr_info("image %s/%s%s%s does not exist\n", + rbd_dev->spec->pool_name, + rbd_dev->spec->pool_ns ?: "", + rbd_dev->spec->pool_ns ? "/" : "", + rbd_dev->spec->image_name); + } else { + pr_info("snap %s/%s%s%s@%s does not exist\n", + rbd_dev->spec->pool_name, + rbd_dev->spec->pool_ns ?: "", + rbd_dev->spec->pool_ns ? "/" : "", + rbd_dev->spec->image_name, + rbd_dev->spec->snap_name); + } +} + static void rbd_dev_image_release(struct rbd_device *rbd_dev) { rbd_dev_unprobe(rbd_dev); @@ -6979,6 +6997,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) */ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { + bool need_watch = !rbd_is_ro(rbd_dev); int ret; /* @@ -6995,22 +7014,21 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (ret) goto err_out_format; - if (!depth) { + if (need_watch) { ret = rbd_register_watch(rbd_dev); if (ret) { if (ret == -ENOENT) - pr_info("image %s/%s%s%s does not exist\n", - rbd_dev->spec->pool_name, - rbd_dev->spec->pool_ns ?: "", - rbd_dev->spec->pool_ns ? "/" : "", - rbd_dev->spec->image_name); + rbd_print_dne(rbd_dev, false); goto err_out_format; } } ret = rbd_dev_header_info(rbd_dev); - if (ret) + if (ret) { + if (ret == -ENOENT && !need_watch) + rbd_print_dne(rbd_dev, false); goto err_out_watch; + } /* * If this image is the one being mapped, we have pool name and @@ -7024,12 +7042,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) ret = rbd_spec_fill_names(rbd_dev); if (ret) { if (ret == -ENOENT) - pr_info("snap %s/%s%s%s@%s does not exist\n", - rbd_dev->spec->pool_name, - rbd_dev->spec->pool_ns ?: "", - rbd_dev->spec->pool_ns ? "/" : "", - rbd_dev->spec->image_name, - rbd_dev->spec->snap_name); + rbd_print_dne(rbd_dev, true); goto err_out_probe; } @@ -7061,7 +7074,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: - if (!depth) + if (need_watch) rbd_unregister_watch(rbd_dev); err_out_format: rbd_dev->image_format = 0; -- cgit From 686238b7431dcca870ff0bc8ae280cdc89ee41c7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 18 Nov 2019 12:51:02 +0100 Subject: rbd: remove snapshot existence validation code RBD_DEV_FLAG_EXISTS check in rbd_queue_workfn() is racy and leads to inconsistent behaviour. If the object (or its snapshot) isn't there, the OSD returns ENOENT. A read submitted before the snapshot removal notification is processed would be zero-filled and ended with status OK, while future reads would be failed with IOERR. It also doesn't handle a case when an image that is mapped read-only is removed. On top of this, because watch is no longer established for read-only mappings, we no longer get notifications, so rbd_exists_validate() is effectively dead code. While failing requests rather than returning zeros is a good thing, RBD_DEV_FLAG_EXISTS is not it. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 42 +++--------------------------------------- 1 file changed, 3 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2a22bc24c886..ee2b15e67e96 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -462,7 +462,7 @@ struct rbd_device { * by rbd_dev->lock */ enum rbd_dev_flags { - RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ + RBD_DEV_FLAG_EXISTS, /* rbd_dev_device_setup() ran */ RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */ }; @@ -4865,19 +4865,6 @@ static void rbd_queue_workfn(struct work_struct *work) rbd_assert(!rbd_is_snap(rbd_dev)); } - /* - * Quit early if the mapped snapshot no longer exists. It's - * still possible the snapshot will have disappeared by the - * time our request arrives at the osd, but there's no sense in - * sending it if we already know. - */ - if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { - dout("request for non-existent snapshot"); - rbd_assert(rbd_is_snap(rbd_dev)); - result = -ENXIO; - goto err_rq; - } - if (offset && length > U64_MAX - offset + 1) { rbd_warn(rbd_dev, "bad request range (%llu~%llu)", offset, length); @@ -5057,25 +5044,6 @@ out: return ret; } -/* - * Clear the rbd device's EXISTS flag if the snapshot it's mapped to - * has disappeared from the (just updated) snapshot context. - */ -static void rbd_exists_validate(struct rbd_device *rbd_dev) -{ - u64 snap_id; - - if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) - return; - - snap_id = rbd_dev->spec->snap_id; - if (snap_id == CEPH_NOSNAP) - return; - - if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX) - clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); -} - static void rbd_dev_update_size(struct rbd_device *rbd_dev) { sector_t size; @@ -5116,12 +5084,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } - if (!rbd_is_snap(rbd_dev)) { - rbd_dev->mapping.size = rbd_dev->header.image_size; - } else { - /* validate mapped snapshot's EXISTS flag */ - rbd_exists_validate(rbd_dev); - } + rbd_assert(!rbd_is_snap(rbd_dev)); + rbd_dev->mapping.size = rbd_dev->header.image_size; out: up_write(&rbd_dev->header_rwsem); -- cgit From fa58bcad90446a8b30bf0d7f3827844bff30ff59 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 5 Nov 2019 13:16:52 +0100 Subject: rbd: don't query snapshot features Since infernalis, ceph.git commit 281f87f9ee52 ("cls_rbd: get_features on snapshots returns HEAD image features"), querying and checking that is pointless. Userspace support for manipulating image features after image creation came also in infernalis, so a snapshot with a different set of features wasn't ever possible. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ee2b15e67e96..3d8342bd6b05 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -377,7 +377,6 @@ struct rbd_client_id { struct rbd_mapping { u64 size; - u64 features; }; /* @@ -644,8 +643,6 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, u8 *order, u64 *snap_size); -static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, - u64 *snap_features); static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev); static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result); @@ -1320,51 +1317,23 @@ static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id, - u64 *snap_features) -{ - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); - if (snap_id == CEPH_NOSNAP) { - *snap_features = rbd_dev->header.features; - } else if (rbd_dev->image_format == 1) { - *snap_features = 0; /* No features for format 1 */ - } else { - u64 features = 0; - int ret; - - ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features); - if (ret) - return ret; - - *snap_features = features; - } - return 0; -} - static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) { u64 snap_id = rbd_dev->spec->snap_id; u64 size = 0; - u64 features = 0; int ret; ret = rbd_snap_size(rbd_dev, snap_id, &size); - if (ret) - return ret; - ret = rbd_snap_features(rbd_dev, snap_id, &features); if (ret) return ret; rbd_dev->mapping.size = size; - rbd_dev->mapping.features = features; - return 0; } static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) { rbd_dev->mapping.size = 0; - rbd_dev->mapping.features = 0; } static void zero_bvec(struct bio_vec *bv) @@ -5207,17 +5176,12 @@ static ssize_t rbd_size_show(struct device *dev, (unsigned long long)rbd_dev->mapping.size); } -/* - * Note this shows the features for whatever's mapped, which is not - * necessarily the base image. - */ static ssize_t rbd_features_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "0x%016llx\n", - (unsigned long long)rbd_dev->mapping.features); + return sprintf(buf, "0x%016llx\n", rbd_dev->header.features); } static ssize_t rbd_major_show(struct device *dev, -- cgit From 196e2d6d0252d37be385c73f64fc8f5787a52066 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 5 Nov 2019 15:38:46 +0100 Subject: rbd: ask for a weaker incompat mask for read-only mappings For a read-only mapping, ask for a set of features that make the image only unwritable rather than both unreadable and unwritable by a client that doesn't understand them. As of today, the difference between them for krbd is journaling (JOURNALING) and live migration (MIGRATING). get_features method supports read_only parameter since hammer, ceph.git commit 6176ec5fde2a ("librbd: differentiate between R/O vs R/W RBD features"). Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3d8342bd6b05..3a40b5f60810 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5669,9 +5669,12 @@ out: } static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, - u64 *snap_features) + bool read_only, u64 *snap_features) { - __le64 snapid = cpu_to_le64(snap_id); + struct { + __le64 snap_id; + u8 read_only; + } features_in; struct { __le64 features; __le64 incompat; @@ -5679,9 +5682,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 unsup; int ret; + features_in.snap_id = cpu_to_le64(snap_id); + features_in.read_only = read_only; + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_features", - &snapid, sizeof(snapid), + &features_in, sizeof(features_in), &features_buf, sizeof(features_buf)); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) @@ -5709,7 +5715,8 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, static int rbd_dev_v2_features(struct rbd_device *rbd_dev) { return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.features); + rbd_is_ro(rbd_dev), + &rbd_dev->header.features); } /* -- cgit From 82995cc6c5ae4bf4d72edef381a085e52d5b5905 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Mar 2019 16:38:32 +0000 Subject: libceph, rbd, ceph: convert to use the new mount API Convert the ceph filesystem to the new internal mount API as the old one will be obsoleted and removed. This allows greater flexibility in communication of mount parameters between userspace, the VFS and the filesystem. See Documentation/filesystems/mount_api.txt for more information. [ Numerous string handling, leak and regression fixes; rbd conversion was particularly broken and had to be redone almost from scratch. ] Signed-off-by: David Howells Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 262 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 153 insertions(+), 109 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3a40b5f60810..2b184563cd32 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -838,34 +838,34 @@ enum { Opt_queue_depth, Opt_alloc_size, Opt_lock_timeout, - Opt_last_int, /* int args above */ Opt_pool_ns, - Opt_last_string, /* string args above */ Opt_read_only, Opt_read_write, Opt_lock_on_read, Opt_exclusive, Opt_notrim, - Opt_err }; -static match_table_t rbd_opts_tokens = { - {Opt_queue_depth, "queue_depth=%d"}, - {Opt_alloc_size, "alloc_size=%d"}, - {Opt_lock_timeout, "lock_timeout=%d"}, - /* int args above */ - {Opt_pool_ns, "_pool_ns=%s"}, - /* string args above */ - {Opt_read_only, "read_only"}, - {Opt_read_only, "ro"}, /* Alternate spelling */ - {Opt_read_write, "read_write"}, - {Opt_read_write, "rw"}, /* Alternate spelling */ - {Opt_lock_on_read, "lock_on_read"}, - {Opt_exclusive, "exclusive"}, - {Opt_notrim, "notrim"}, - {Opt_err, NULL} +static const struct fs_parameter_spec rbd_param_specs[] = { + fsparam_u32 ("alloc_size", Opt_alloc_size), + fsparam_flag ("exclusive", Opt_exclusive), + fsparam_flag ("lock_on_read", Opt_lock_on_read), + fsparam_u32 ("lock_timeout", Opt_lock_timeout), + fsparam_flag ("notrim", Opt_notrim), + fsparam_string ("_pool_ns", Opt_pool_ns), + fsparam_u32 ("queue_depth", Opt_queue_depth), + fsparam_flag ("read_only", Opt_read_only), + fsparam_flag ("read_write", Opt_read_write), + fsparam_flag ("ro", Opt_read_only), + fsparam_flag ("rw", Opt_read_write), + {} +}; + +static const struct fs_parameter_description rbd_parameters = { + .name = "rbd", + .specs = rbd_param_specs, }; struct rbd_options { @@ -886,87 +886,12 @@ struct rbd_options { #define RBD_EXCLUSIVE_DEFAULT false #define RBD_TRIM_DEFAULT true -struct parse_rbd_opts_ctx { +struct rbd_parse_opts_ctx { struct rbd_spec *spec; + struct ceph_options *copts; struct rbd_options *opts; }; -static int parse_rbd_opts_token(char *c, void *private) -{ - struct parse_rbd_opts_ctx *pctx = private; - substring_t argstr[MAX_OPT_ARGS]; - int token, intval, ret; - - token = match_token(c, rbd_opts_tokens, argstr); - if (token < Opt_last_int) { - ret = match_int(&argstr[0], &intval); - if (ret < 0) { - pr_err("bad option arg (not int) at '%s'\n", c); - return ret; - } - dout("got int token %d val %d\n", token, intval); - } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, argstr[0].from); - } else { - dout("got token %d\n", token); - } - - switch (token) { - case Opt_queue_depth: - if (intval < 1) { - pr_err("queue_depth out of range\n"); - return -EINVAL; - } - pctx->opts->queue_depth = intval; - break; - case Opt_alloc_size: - if (intval < SECTOR_SIZE) { - pr_err("alloc_size out of range\n"); - return -EINVAL; - } - if (!is_power_of_2(intval)) { - pr_err("alloc_size must be a power of 2\n"); - return -EINVAL; - } - pctx->opts->alloc_size = intval; - break; - case Opt_lock_timeout: - /* 0 is "wait forever" (i.e. infinite timeout) */ - if (intval < 0 || intval > INT_MAX / 1000) { - pr_err("lock_timeout out of range\n"); - return -EINVAL; - } - pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000); - break; - case Opt_pool_ns: - kfree(pctx->spec->pool_ns); - pctx->spec->pool_ns = match_strdup(argstr); - if (!pctx->spec->pool_ns) - return -ENOMEM; - break; - case Opt_read_only: - pctx->opts->read_only = true; - break; - case Opt_read_write: - pctx->opts->read_only = false; - break; - case Opt_lock_on_read: - pctx->opts->lock_on_read = true; - break; - case Opt_exclusive: - pctx->opts->exclusive = true; - break; - case Opt_notrim: - pctx->opts->trim = false; - break; - default: - /* libceph prints "bad option" msg */ - return -EINVAL; - } - - return 0; -} - static char* obj_op_name(enum obj_operation_type op_type) { switch (op_type) { @@ -6423,6 +6348,122 @@ static inline char *dup_token(const char **buf, size_t *lenp) return dup; } +static int rbd_parse_param(struct fs_parameter *param, + struct rbd_parse_opts_ctx *pctx) +{ + struct rbd_options *opt = pctx->opts; + struct fs_parse_result result; + int token, ret; + + ret = ceph_parse_param(param, pctx->copts, NULL); + if (ret != -ENOPARAM) + return ret; + + token = fs_parse(NULL, &rbd_parameters, param, &result); + dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); + if (token < 0) { + if (token == -ENOPARAM) { + return invalf(NULL, "rbd: Unknown parameter '%s'", + param->key); + } + return token; + } + + switch (token) { + case Opt_queue_depth: + if (result.uint_32 < 1) + goto out_of_range; + opt->queue_depth = result.uint_32; + break; + case Opt_alloc_size: + if (result.uint_32 < SECTOR_SIZE) + goto out_of_range; + if (!is_power_of_2(result.uint_32)) { + return invalf(NULL, "rbd: alloc_size must be a power of 2"); + } + opt->alloc_size = result.uint_32; + break; + case Opt_lock_timeout: + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (result.uint_32 > INT_MAX / 1000) + goto out_of_range; + opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000); + break; + case Opt_pool_ns: + kfree(pctx->spec->pool_ns); + pctx->spec->pool_ns = param->string; + param->string = NULL; + break; + case Opt_read_only: + opt->read_only = true; + break; + case Opt_read_write: + opt->read_only = false; + break; + case Opt_lock_on_read: + opt->lock_on_read = true; + break; + case Opt_exclusive: + opt->exclusive = true; + break; + case Opt_notrim: + opt->trim = false; + break; + default: + BUG(); + } + + return 0; + +out_of_range: + return invalf(NULL, "rbd: %s out of range", param->key); +} + +/* + * This duplicates most of generic_parse_monolithic(), untying it from + * fs_context and skipping standard superblock and security options. + */ +static int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx) +{ + char *key; + int ret = 0; + + dout("%s '%s'\n", __func__, options); + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + struct fs_parameter param = { + .key = key, + .type = fs_value_is_string, + }; + char *value = strchr(key, '='); + size_t v_len = 0; + + if (value) { + if (value == key) + continue; + *value++ = 0; + v_len = strlen(value); + } + + + if (v_len > 0) { + param.string = kmemdup_nul(value, v_len, + GFP_KERNEL); + if (!param.string) + return -ENOMEM; + } + param.size = v_len; + + ret = rbd_parse_param(¶m, pctx); + kfree(param.string); + if (ret) + break; + } + } + + return ret; +} + /* * Parse the options provided for an "rbd add" (i.e., rbd image * mapping) request. These arrive via a write to /sys/bus/rbd/add, @@ -6474,8 +6515,7 @@ static int rbd_add_parse_args(const char *buf, const char *mon_addrs; char *snap_name; size_t mon_addrs_size; - struct parse_rbd_opts_ctx pctx = { 0 }; - struct ceph_options *copts; + struct rbd_parse_opts_ctx pctx = { 0 }; int ret; /* The first four tokens are required */ @@ -6486,7 +6526,7 @@ static int rbd_add_parse_args(const char *buf, return -EINVAL; } mon_addrs = buf; - mon_addrs_size = len + 1; + mon_addrs_size = len; buf += len; ret = -EINVAL; @@ -6536,6 +6576,10 @@ static int rbd_add_parse_args(const char *buf, *(snap_name + len) = '\0'; pctx.spec->snap_name = snap_name; + pctx.copts = ceph_alloc_options(); + if (!pctx.copts) + goto out_mem; + /* Initialize all rbd options to the defaults */ pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL); @@ -6550,27 +6594,27 @@ static int rbd_add_parse_args(const char *buf, pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; pctx.opts->trim = RBD_TRIM_DEFAULT; - copts = ceph_parse_options(options, mon_addrs, - mon_addrs + mon_addrs_size - 1, - parse_rbd_opts_token, &pctx); - if (IS_ERR(copts)) { - ret = PTR_ERR(copts); + ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL); + if (ret) goto out_err; - } - kfree(options); - *ceph_opts = copts; + ret = rbd_parse_options(options, &pctx); + if (ret) + goto out_err; + + *ceph_opts = pctx.copts; *opts = pctx.opts; *rbd_spec = pctx.spec; - + kfree(options); return 0; + out_mem: ret = -ENOMEM; out_err: kfree(pctx.opts); + ceph_destroy_options(pctx.copts); rbd_spec_put(pctx.spec); kfree(options); - return ret; } -- cgit