From 16d80c54ad42c573a897ae7bcf5a9816be54e6fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 Mar 2019 14:50:04 +0100 Subject: rbd: set io_min, io_opt and discard_granularity to alloc_size Now that we have alloc_size that controls our discard behavior, it doesn't make sense to have these set to object (set) size. alloc_size defaults to 64k, but because discard_granularity is likely 4M, only ranges that are equal to or bigger than 4M can be considered during fstrim. A smaller io_min is also more likely to be met, resulting in fewer deferred writes on bluestore OSDs. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4ba967d65cf9..3b2c9289dccb 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -833,7 +833,7 @@ static int parse_rbd_opts_token(char *c, void *private) pctx->opts->queue_depth = intval; break; case Opt_alloc_size: - if (intval < 1) { + if (intval < SECTOR_SIZE) { pr_err("alloc_size out of range\n"); return -EINVAL; } @@ -4203,12 +4203,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, objset_bytes); - blk_queue_io_opt(q, objset_bytes); + blk_queue_io_min(q, rbd_dev->opts->alloc_size); + blk_queue_io_opt(q, rbd_dev->opts->alloc_size); if (rbd_dev->opts->trim) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = objset_bytes; + q->limits.discard_granularity = rbd_dev->opts->alloc_size; blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); } -- cgit From bb229bbb3bf63d23128e851a1f3b85c083178fa1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Mar 2019 09:46:58 +0100 Subject: libceph: wait for latest osdmap in ceph_monc_blacklist_add() Because map updates are distributed lazily, an OSD may not know about the new blacklist for quite some time after "osd blacklist add" command is completed. This makes it possible for a blacklisted but still alive client to overwrite a post-blacklist update, resulting in data corruption. Waiting for latest osdmap in ceph_monc_blacklist_add() and thus using the post-blacklist epoch for all post-blacklist requests ensures that all such requests "wait" for the blacklist to come into force on their respective OSDs. Cc: stable@vger.kernel.org Fixes: 6305a3b41515 ("libceph: support for blacklisting clients") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- include/linux/ceph/libceph.h | 2 ++ net/ceph/ceph_common.c | 18 +++++++++++++++++- net/ceph/mon_client.c | 9 +++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index a420c07904bc..337d5049ff93 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -294,6 +294,8 @@ extern void ceph_destroy_client(struct ceph_client *client); extern int __ceph_open_session(struct ceph_client *client, unsigned long started); extern int ceph_open_session(struct ceph_client *client); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 9cab80207ced..79eac465ec65 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -738,7 +738,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) } EXPORT_SYMBOL(__ceph_open_session); - int ceph_open_session(struct ceph_client *client) { int ret; @@ -754,6 +753,23 @@ int ceph_open_session(struct ceph_client *client) } EXPORT_SYMBOL(ceph_open_session); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout) +{ + u64 newest_epoch; + int ret; + + ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); + if (ret) + return ret; + + if (client->osdc.osdmap->epoch >= newest_epoch) + return 0; + + ceph_osdc_maybe_request_map(&client->osdc); + return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout); +} +EXPORT_SYMBOL(ceph_wait_for_latest_osdmap); static int __init init_ceph_lib(void) { diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 18deb3d889c4..a53e4fbb6319 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, mutex_unlock(&monc->mutex); ret = wait_generic_request(req); + if (!ret) + /* + * Make sure we have the osdmap that includes the blacklist + * entry. This is needed to ensure that the OSDs pick up the + * new blacklist before processing any future requests from + * this client. + */ + ret = ceph_wait_for_latest_osdmap(monc->client, 0); + out: put_generic_request(req); return ret; -- cgit From 9d4a227f6ef189cf37eb22641f6ee788b7dc41bb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Mar 2019 10:58:05 +0100 Subject: rbd: drop wait_for_latest_osdmap() Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3b2c9289dccb..2210c1b9491b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -924,23 +924,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -static int wait_for_latest_osdmap(struct ceph_client *client) -{ - u64 newest_epoch; - int ret; - - ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); - if (ret) - return ret; - - if (client->osdc.osdmap->epoch >= newest_epoch) - return 0; - - ceph_osdc_maybe_request_map(&client->osdc); - return ceph_monc_wait_osdmap(&client->monc, newest_epoch, - client->options->mount_timeout); -} - /* * Get a ceph client with specific addr and configuration, if one does * not exist create it. Either way, ceph_opts is consumed by this @@ -960,7 +943,8 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) * Using an existing client. Make sure ->pg_pools is up to * date before we look up the pool id in do_rbd_add(). */ - ret = wait_for_latest_osdmap(rbdc->client); + ret = ceph_wait_for_latest_osdmap(rbdc->client, + rbdc->client->options->mount_timeout); if (ret) { rbd_warn(NULL, "failed to get latest osdmap: %d", ret); rbd_put_client(rbdc); -- cgit