diff options
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
| -rw-r--r-- | drivers/block/drbd/drbd_nl.c | 738 |
1 files changed, 356 insertions, 382 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ad0fcb43e45c..91f3b8afb63c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* drbd_nl.c @@ -7,19 +8,6 @@ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ @@ -37,7 +25,7 @@ #include "drbd_protocol.h" #include "drbd_req.h" #include "drbd_state_change.h" -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/drbd_limits.h> #include <linux/kthread.h> @@ -94,7 +82,7 @@ static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */ DEFINE_MUTEX(notification_mutex); -/* used blkdev_get_by_path, to claim our meta data device(s) */ +/* used bdev_open_by_path, to claim our meta data device(s) */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) @@ -114,7 +102,7 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info) if (!info || !info[0]) return 0; - nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY); if (!nla) return err; @@ -127,6 +115,35 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info) return 0; } +__printf(2, 3) +static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...) +{ + va_list args; + struct nlattr *nla, *txt; + int err = -EMSGSIZE; + int len; + + nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY); + if (!nla) + return err; + + txt = nla_reserve(skb, T_info_text, 256); + if (!txt) { + nla_nest_cancel(skb, nla); + return err; + } + va_start(args, fmt); + len = vscnprintf(nla_data(txt), 256, fmt, args); + va_end(args); + + /* maybe: retry with larger reserve, if truncated */ + txt->nla_len = nla_attr_size(len+1); + nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len)); + nla_nest_end(skb, nla); + + return 0; +} + /* This would be a good candidate for a "pre_doit" hook, * and per-family private info->pointers. * But we need to stay compatible with older kernels. @@ -142,7 +159,7 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info) static int drbd_adm_prepare(struct drbd_config_context *adm_ctx, struct sk_buff *skb, struct genl_info *info, unsigned flags) { - struct drbd_genlmsghdr *d_in = info->userhdr; + struct drbd_genlmsghdr *d_in = genl_info_userhdr(info); const u8 cmd = info->genlhdr->cmd; int err; @@ -251,19 +268,18 @@ static int drbd_adm_prepare(struct drbd_config_context *adm_ctx, /* some more paranoia, if the request was over-determined */ if (adm_ctx->device && adm_ctx->resource && adm_ctx->device->resource != adm_ctx->resource) { - pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", - adm_ctx->minor, adm_ctx->resource->name, - adm_ctx->device->resource->name); + pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", + adm_ctx->minor, adm_ctx->resource->name, + adm_ctx->device->resource->name); drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource"); return ERR_INVALID_REQUEST; } if (adm_ctx->device && adm_ctx->volume != VOLUME_UNSPECIFIED && adm_ctx->volume != adm_ctx->device->vnr) { - pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", - adm_ctx->minor, adm_ctx->volume, - adm_ctx->device->vnr, - adm_ctx->device->resource->name); + pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", + adm_ctx->minor, adm_ctx->volume, + adm_ctx->device->vnr, adm_ctx->device->resource->name); drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume"); return ERR_INVALID_REQUEST; } @@ -344,7 +360,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) (char[60]) { }, /* address */ NULL }; char mb[14]; - char *argv[] = {usermode_helper, cmd, mb, NULL }; + char *argv[] = {drbd_usermode_helper, cmd, mb, NULL }; struct drbd_connection *connection = first_peer_device(device)->connection; struct sib_info sib; int ret; @@ -359,19 +375,19 @@ int drbd_khelper(struct drbd_device *device, char *cmd) * write out any unsynced meta data changes now */ drbd_md_sync(device); - drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb); + drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb); sib.sib_reason = SIB_HELPER_PRE; sib.helper_name = cmd; drbd_bcast_event(device, &sib); notify_helper(NOTIFY_CALL, device, connection, cmd, 0); - ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); + ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, mb, + drbd_usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); else drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, mb, + drbd_usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); sib.sib_reason = SIB_HELPER_POST; sib.helper_exit_code = ret; @@ -396,24 +412,24 @@ enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd) (char[60]) { }, /* address */ NULL }; char *resource_name = connection->resource->name; - char *argv[] = {usermode_helper, cmd, resource_name, NULL }; + char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL }; int ret; setup_khelper_env(connection, envp); conn_md_sync(connection); - drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name); + drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name); /* TODO: conn_bcast_event() ?? */ notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0); - ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); + ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, resource_name, + drbd_usermode_helper, cmd, resource_name, (ret >> 8) & 0xff, ret); else drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, resource_name, + drbd_usermode_helper, cmd, resource_name, (ret >> 8) & 0xff, ret); /* TODO: conn_bcast_event() ?? */ notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret); @@ -582,7 +598,7 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection) struct task_struct *opa; kref_get(&connection->kref); - /* We may just have force_sig()'ed this thread + /* We may have just sent a signal to this thread * to get it out of some blocking network function. * Clear signals; otherwise kthread_run(), which internally uses * wait_on_completion_killable(), will mistake our pending signal @@ -668,14 +684,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - int timeo; - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; - rcu_read_unlock(); - schedule_timeout_interruptible(timeo); - if (try < max_tries) + if (try < max_tries) { + int timeo; try = max_tries - 1; + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); + } continue; } if (rv < SS_SUCCESS) { @@ -753,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) struct set_role_parms parms; int err; enum drbd_ret_code retcode; + enum drbd_state_rv rv; retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -773,12 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) mutex_lock(&adm_ctx.resource->adm_mutex); if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) - retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); + rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); else - retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); + rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); mutex_unlock(&adm_ctx.resource->adm_mutex); genl_lock(); + drbd_adm_finish(&adm_ctx, info, rv); + return 0; out: drbd_adm_finish(&adm_ctx, info, retcode); return 0; @@ -899,7 +919,7 @@ void drbd_resume_io(struct drbd_device *device) wake_up(&device->misc_wait); } -/** +/* * drbd_determine_dev_size() - Sets the right device size obeying all constraints * @device: DRBD device. * @@ -921,7 +941,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct } prev; sector_t u_size, size; struct drbd_md *md = &device->ldev->md; - char ppb[10]; void *buffer; int md_moved, la_size_changed; @@ -980,7 +999,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct goto err_out; } - if (drbd_get_capacity(device->this_bdev) != size || + if (get_capacity(device->vdisk) != size || drbd_bm_capacity(device) != size) { int err; err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); @@ -999,8 +1018,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct /* racy, see comments above. */ drbd_set_my_capacity(device, size); md->la_size_sect = size; - drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), - (unsigned long long)size>>1); } if (rv <= DS_ERROR) goto err_out; @@ -1016,7 +1033,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct /* We do some synchronous IO below, which may take some time. * Clear the timer, to avoid scary "timer expired!" messages, * "Superblock" is written out at least twice below, anyways. */ - del_timer(&device->md_sync_timer); + timer_delete(&device->md_sync_timer); /* We won't change the "al-extents" setting, we just may need * to move the on-disk location of the activity log ringbuffer. @@ -1036,7 +1053,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + "size changed", BM_LOCKED_MASK, NULL); /* on-disk bitmap and activity log is authoritative again * (unless there was an IO error meanwhile...) */ @@ -1120,7 +1137,7 @@ drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev, return size; } -/** +/* * drbd_check_al_size() - Ensures that the AL is of the right size * @device: DRBD device. * @@ -1172,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } -static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) +static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) { - q->limits.discard_granularity = granularity; + /* + * We may ignore peer limits if the peer is modern enough. From 8.3.8 + * onwards the peer can use multiple BIOs for a single peer_request. + */ + if (device->state.conn < C_WF_REPORT_PARAMS) + return device->peer_max_bio_size; + + if (first_peer_device(device)->connection->agreed_pro_version < 94) + return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + + /* + * Correct old drbd (up to 8.3.7) if it believes it can do more than + * 32KiB. + */ + if (first_peer_device(device)->connection->agreed_pro_version == 94) + return DRBD_MAX_SIZE_H80_PACKET; + + /* + * drbd 8.3.8 onwards, before 8.4.0 + */ + if (first_peer_device(device)->connection->agreed_pro_version < 100) + return DRBD_MAX_BIO_SIZE_P95; + return DRBD_MAX_BIO_SIZE; } static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) @@ -1187,201 +1226,120 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) return AL_EXTENT_SIZE >> 9; } -static void decide_on_discard_support(struct drbd_device *device, - struct request_queue *q, - struct request_queue *b, - bool discard_zeroes_if_aligned) +static bool drbd_discard_supported(struct drbd_connection *connection, + struct drbd_backing_dev *bdev) { - /* q = drbd device queue (device->rq_queue) - * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue), - * or NULL if diskless - */ - struct drbd_connection *connection = first_peer_device(device)->connection; - bool can_do = b ? blk_queue_discard(b) : true; - - if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { - can_do = false; - drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); - } - if (can_do) { - /* We don't care for the granularity, really. - * Stacking limits below should fix it for the local - * device. Whether or not it is a suitable granularity - * on the remote device is not our problem, really. If - * you care, you need to use devices with similar - * topology on all peers. */ - blk_queue_discard_granularity(q, 512); - q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); - q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); - } else { - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); - blk_queue_discard_granularity(q, 0); - q->limits.max_discard_sectors = 0; - q->limits.max_write_zeroes_sectors = 0; - } -} + if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) + return false; -static void fixup_discard_if_not_supported(struct request_queue *q) -{ - /* To avoid confusion, if this queue does not support discard, clear - * max_discard_sectors, which is what lsblk -D reports to the user. - * Older kernels got this wrong in "stack limits". - * */ - if (!blk_queue_discard(q)) { - blk_queue_max_discard_sectors(q, 0); - blk_queue_discard_granularity(q, 0); + if (connection->cstate >= C_CONNECTED && + !(connection->agreed_features & DRBD_FF_TRIM)) { + drbd_info(connection, + "peer DRBD too old, does not support TRIM: disabling discards\n"); + return false; } + + return true; } -static void decide_on_write_same_support(struct drbd_device *device, - struct request_queue *q, - struct request_queue *b, struct o_qlim *o) +/* This is the workaround for "bio would need to, but cannot, be split" */ +static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) { - struct drbd_peer_device *peer_device = first_peer_device(device); - struct drbd_connection *connection = peer_device->connection; - bool can_do = b ? b->limits.max_write_same_sectors : true; - - if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) { - can_do = false; - drbd_info(peer_device, "peer does not support WRITE_SAME\n"); - } - - if (o) { - /* logical block size; queue_logical_block_size(NULL) is 512 */ - unsigned int peer_lbs = be32_to_cpu(o->logical_block_size); - unsigned int me_lbs_b = queue_logical_block_size(b); - unsigned int me_lbs = queue_logical_block_size(q); - - if (me_lbs_b != me_lbs) { - drbd_warn(device, - "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n", - me_lbs, me_lbs_b); - /* rather disable write same than trigger some BUG_ON later in the scsi layer. */ - can_do = false; - } - if (me_lbs_b != peer_lbs) { - drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n", - me_lbs, peer_lbs); - if (can_do) { - drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n"); - can_do = false; - } - me_lbs = max(me_lbs, me_lbs_b); - /* We cannot change the logical block size of an in-use queue. - * We can only hope that access happens to be properly aligned. - * If not, the peer will likely produce an IO error, and detach. */ - if (peer_lbs > me_lbs) { - if (device->state.role != R_PRIMARY) { - blk_queue_logical_block_size(q, peer_lbs); - drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs); - } else { - drbd_warn(peer_device, - "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n", - me_lbs, peer_lbs); - } - } - } - if (can_do && !o->write_same_capable) { - /* If we introduce an open-coded write-same loop on the receiving side, - * the peer would present itself as "capable". */ - drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n"); - can_do = false; - } - } + unsigned int max_segments; - blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0); + rcu_read_lock(); + max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); + + if (!max_segments) + return BLK_MAX_SEGMENTS; + return max_segments; } -static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, - unsigned int max_bio_size, struct o_qlim *o) +void drbd_reconsider_queue_parameters(struct drbd_device *device, + struct drbd_backing_dev *bdev, struct o_qlim *o) { + struct drbd_connection *connection = + first_peer_device(device)->connection; struct request_queue * const q = device->rq_queue; - unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = 0; + unsigned int now = queue_max_hw_sectors(q) << 9; + struct queue_limits lim; struct request_queue *b = NULL; - struct disk_conf *dc; - bool discard_zeroes_if_aligned = true; + unsigned int new; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; - max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - max_segments = dc->max_bio_bvecs; - discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned; - rcu_read_unlock(); - - blk_set_stacking_limits(&q->limits); + device->local_max_bio_size = + queue_max_hw_sectors(b) << SECTOR_SHIFT; } - blk_queue_max_hw_sectors(q, max_hw_sectors); - /* This is the workaround for "bio would need to, but cannot, be split" */ - blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); - blk_queue_segment_boundary(q, PAGE_SIZE-1); - decide_on_discard_support(device, q, b, discard_zeroes_if_aligned); - decide_on_write_same_support(device, q, b, o); - - if (b) { - blk_queue_stack_limits(q, b); - - if (q->backing_dev_info->ra_pages != - b->backing_dev_info->ra_pages) { - drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info->ra_pages, - b->backing_dev_info->ra_pages); - q->backing_dev_info->ra_pages = - b->backing_dev_info->ra_pages; - } + /* + * We may later detach and re-attach on a disconnected Primary. Avoid + * decreasing the value in this case. + * + * We want to store what we know the peer DRBD can handle, not what the + * peer IO backend can handle. + */ + new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size, + max(drbd_max_peer_bio_size(device), device->peer_max_bio_size)); + if (new != now) { + if (device->state.role == R_PRIMARY && new < now) + drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", + new, now); + drbd_info(device, "max BIO size = %u\n", new); } - fixup_discard_if_not_supported(q); -} - -void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) -{ - unsigned int now, new, local, peer; - - now = queue_max_hw_sectors(device->rq_queue) << 9; - local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ - peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ + lim = queue_limits_start_update(q); if (bdev) { - local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; - device->local_max_bio_size = local; + blk_set_stacking_limits(&lim); + lim.max_segments = drbd_backing_dev_max_segments(device); + } else { + lim.max_segments = BLK_MAX_SEGMENTS; } - local = min(local, DRBD_MAX_BIO_SIZE); - /* We may ignore peer limits if the peer is modern enough. - Because new from 8.3.8 onwards the peer can use multiple - BIOs for a single peer_request */ - if (device->state.conn >= C_WF_REPORT_PARAMS) { - if (first_peer_device(device)->connection->agreed_pro_version < 94) - peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); - /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ - else if (first_peer_device(device)->connection->agreed_pro_version == 94) - peer = DRBD_MAX_SIZE_H80_PACKET; - else if (first_peer_device(device)->connection->agreed_pro_version < 100) - peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ - else - peer = DRBD_MAX_BIO_SIZE; + lim.max_hw_sectors = new >> SECTOR_SHIFT; + lim.seg_boundary_mask = PAGE_SIZE - 1; - /* We may later detach and re-attach on a disconnected Primary. - * Avoid this setting to jump back in that case. - * We want to store what we know the peer DRBD can handle, - * not what the peer IO backend can handle. */ - if (peer > device->peer_max_bio_size) - device->peer_max_bio_size = peer; + /* + * We don't care for the granularity, really. + * + * Stacking limits below should fix it for the local device. Whether or + * not it is a suitable granularity on the remote device is not our + * problem, really. If you care, you need to use devices with similar + * topology on all peers. + */ + if (drbd_discard_supported(connection, bdev)) { + lim.discard_granularity = 512; + lim.max_hw_discard_sectors = + drbd_max_discard_sectors(connection); + } else { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; } - new = min(local, peer); - if (device->state.role == R_PRIMARY && new < now) - drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); + if (bdev) + blk_stack_limits(&lim, &b->limits, 0); - if (new != now) - drbd_info(device, "max BIO size = %u\n", new); + /* + * If we can handle "zeroes" efficiently on the protocol, we want to do + * that, even if our backend does not announce max_write_zeroes_sectors + * itself. + */ + if (connection->agreed_features & DRBD_FF_WZEROES) + lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; + else + lim.max_write_zeroes_sectors = 0; + lim.max_hw_wzeroes_unmap_sectors = 0; + + if ((lim.discard_granularity >> SECTOR_SHIFT) > + lim.max_hw_discard_sectors) { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; + } - drbd_setup_queue_param(device, bdev, new, o); + if (queue_limits_commit_update(q, &lim)) + drbd_err(device, "setting new queue limits failed\n"); } /* Starts the worker thread */ @@ -1431,8 +1389,9 @@ static void drbd_suspend_al(struct drbd_device *device) static bool should_set_defaults(struct genl_info *info) { - unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags; - return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); + struct drbd_genlmsghdr *dh = genl_info_userhdr(info); + + return 0 != (dh->flags & DRBD_GENL_F_SET_DEFAULTS); } static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) @@ -1473,14 +1432,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf, struct drbd_backing_dev *nbc) { - struct request_queue * const q = nbc->backing_bdev->bd_disk->queue; + struct block_device *bdev = nbc->backing_bdev; if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; if (disk_conf->al_extents > drbd_al_extents_max(nbc)) disk_conf->al_extents = drbd_al_extents_max(nbc); - if (!blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(bdev)) { if (disk_conf->rs_discard_granularity) { disk_conf->rs_discard_granularity = 0; /* disable feature */ drbd_info(device, "rs_discard_granularity feature disabled\n"); @@ -1489,16 +1448,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis if (disk_conf->rs_discard_granularity) { int orig_value = disk_conf->rs_discard_granularity; + sector_t discard_size = bdev_max_discard_sectors(bdev) << 9; + unsigned int discard_granularity = bdev_discard_granularity(bdev); int remainder; - if (q->limits.discard_granularity > disk_conf->rs_discard_granularity) - disk_conf->rs_discard_granularity = q->limits.discard_granularity; + if (discard_granularity > disk_conf->rs_discard_granularity) + disk_conf->rs_discard_granularity = discard_granularity; - remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity; + remainder = disk_conf->rs_discard_granularity % + discard_granularity; disk_conf->rs_discard_granularity += remainder; - if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9) - disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9; + if (disk_conf->rs_discard_granularity > discard_size) + disk_conf->rs_discard_granularity = discard_size; if (disk_conf->rs_discard_granularity != orig_value) drbd_info(device, "rs_discard_granularity changed to %d\n", @@ -1506,6 +1468,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis } } +static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc) +{ + int err = -EBUSY; + + if (device->act_log && + device->act_log->nr_elements == dc->al_extents) + return 0; + + drbd_suspend_io(device); + /* If IO completion is currently blocked, we would likely wait + * "forever" for the activity log to become unused. So we don't. */ + if (atomic_read(&device->ap_bio_cnt)) + goto out; + + wait_event(device->al_wait, lc_try_lock(device->act_log)); + drbd_al_shrink(device); + err = drbd_check_al_size(device, dc); + lc_unlock(device->act_log); + wake_up(&device->al_wait); +out: + drbd_resume_io(device); + return err; +} + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; @@ -1513,7 +1499,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) struct drbd_device *device; struct disk_conf *new_disk_conf, *old_disk_conf; struct fifo_buffer *old_plan = NULL, *new_plan = NULL; - int err, fifo_size; + int err; + unsigned int fifo_size; retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1550,7 +1537,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) goto fail_unlock; } - if (!expect(new_disk_conf->resync_rate >= 1)) + if (!expect(device, new_disk_conf->resync_rate >= 1)) new_disk_conf->resync_rate = 1; sanitize_disk_conf(device, new_disk_conf, device->ldev); @@ -1568,15 +1555,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } - drbd_suspend_io(device); - wait_event(device->al_wait, lc_try_lock(device->act_log)); - drbd_al_shrink(device); - err = drbd_check_al_size(device, new_disk_conf); - lc_unlock(device->act_log); - wake_up(&device->al_wait); - drbd_resume_io(device); - + err = disk_opts_check_al_size(device, new_disk_conf); if (err) { + /* Could be just "busy". Ignore? + * Introduce dedicated error code? */ + drbd_msg_put_info(adm_ctx.reply_skb, + "Try again without changing current al-extents setting"); retcode = ERR_NOMEM; goto fail_unlock; } @@ -1612,7 +1596,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (write_ordering_changed(old_disk_conf, new_disk_conf)) drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); - if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned) + if (old_disk_conf->discard_zeroes_if_aligned != + new_disk_conf->discard_zeroes_if_aligned) drbd_reconsider_queue_parameters(device, device->ldev, NULL); drbd_md_sync(device); @@ -1624,8 +1609,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) drbd_send_sync_param(peer_device); } - synchronize_rcu(); - kfree(old_disk_conf); + kvfree_rcu_mightsleep(old_disk_conf); kfree(old_plan); mod_timer(&device->request_timer, jiffies + HZ); goto success; @@ -1644,43 +1628,45 @@ success: return 0; } -static struct block_device *open_backing_dev(struct drbd_device *device, +static struct file *open_backing_dev(struct drbd_device *device, const char *bdev_path, void *claim_ptr, bool do_bd_link) { - struct block_device *bdev; + struct file *file; int err = 0; - bdev = blkdev_get_by_path(bdev_path, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr); - if (IS_ERR(bdev)) { + file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, + claim_ptr, NULL); + if (IS_ERR(file)) { drbd_err(device, "open(\"%s\") failed with %ld\n", - bdev_path, PTR_ERR(bdev)); - return bdev; + bdev_path, PTR_ERR(file)); + return file; } if (!do_bd_link) - return bdev; + return file; - err = bd_link_disk_holder(bdev, device->vdisk); + err = bd_link_disk_holder(file_bdev(file), device->vdisk); if (err) { - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + fput(file); drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", bdev_path, err); - bdev = ERR_PTR(err); + file = ERR_PTR(err); } - return bdev; + return file; } static int open_backing_devices(struct drbd_device *device, struct disk_conf *new_disk_conf, struct drbd_backing_dev *nbc) { - struct block_device *bdev; + struct file *file; - bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true); - if (IS_ERR(bdev)) + file = open_backing_dev(device, new_disk_conf->backing_dev, device, + true); + if (IS_ERR(file)) return ERR_OPEN_DISK; - nbc->backing_bdev = bdev; + nbc->backing_bdev = file_bdev(file); + nbc->backing_bdev_file = file; /* * meta_dev_idx >= 0: external fixed size, possibly multiple @@ -1690,7 +1676,7 @@ static int open_backing_devices(struct drbd_device *device, * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - bdev = open_backing_dev(device, new_disk_conf->meta_dev, + file = open_backing_dev(device, new_disk_conf->meta_dev, /* claim ptr: device, if claimed exclusively; shared drbd_m_holder, * if potentially shared with other drbd minors */ (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder, @@ -1698,20 +1684,21 @@ static int open_backing_devices(struct drbd_device *device, * as would happen with internal metadata. */ (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT && new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL)); - if (IS_ERR(bdev)) + if (IS_ERR(file)) return ERR_OPEN_MD_DISK; - nbc->md_bdev = bdev; + nbc->md_bdev = file_bdev(file); + nbc->f_md_bdev = file; return NO_ERROR; } -static void close_backing_dev(struct drbd_device *device, struct block_device *bdev, - bool do_bd_unlink) +static void close_backing_dev(struct drbd_device *device, + struct file *bdev_file, bool do_bd_unlink) { - if (!bdev) + if (!bdev_file) return; if (do_bd_unlink) - bd_unlink_disk_holder(bdev, device->vdisk); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk); + fput(bdev_file); } void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) @@ -1719,8 +1706,9 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev * if (ldev == NULL) return; - close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev); - close_backing_dev(device, ldev->backing_bdev, true); + close_backing_dev(device, ldev->f_md_bdev, + ldev->md_bdev != ldev->backing_bdev); + close_backing_dev(device, ldev->backing_bdev_file, true); kfree(ldev->disk_conf); kfree(ldev); @@ -1880,8 +1868,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Make sure the new disk is big enough * (we may currently be R_PRIMARY with no local disk...) */ - if (drbd_get_max_capacity(nbc) < - drbd_get_capacity(device->this_bdev)) { + if (drbd_get_max_capacity(nbc) < get_capacity(device->vdisk)) { retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1910,7 +1897,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_flush_workqueue(&connection->sender_work); rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); - retcode = rv; /* FIXME: Type mismatch. */ + retcode = (enum drbd_ret_code)rv; drbd_resume_io(device); if (rv < SS_SUCCESS) goto fail; @@ -1925,9 +1912,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (device->state.conn < C_CONNECTED && - device->state.role == R_PRIMARY && device->ed_uuid && - (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { + if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid && + (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) && + (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { drbd_err(device, "Can only attach to data with current UUID=%016llX\n", (unsigned long long)device->ed_uuid); retcode = ERR_DATA_NOT_CURRENT; @@ -1941,11 +1928,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Prevent shrinking of consistent devices ! */ - if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && - drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { - drbd_warn(device, "refusing to truncate a consistent device\n"); - retcode = ERR_DISK_TOO_SMALL; - goto force_diskless_dec; + { + unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0); + unsigned long long eff = nbc->md.la_size_sect; + if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) { + if (nsz == nbc->disk_conf->disk_size) { + drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff); + } else { + drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff); + drbd_msg_sprintf_info(adm_ctx.reply_skb, + "To-be-attached device has last effective > current size, and is consistent\n" + "(%llu > %llu sectors). Refusing to attach.", eff, nsz); + retcode = ERR_IMPLICIT_SHRINK; + goto force_diskless_dec; + } + } } lock_all_resources(); @@ -2028,13 +2025,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_info(device, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from attaching", BM_LOCKED_MASK)) { + "set_n_write from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { if (drbd_bitmap_io(device, &drbd_bm_read, - "read from attaching", BM_LOCKED_MASK)) { + "read from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -2125,8 +2124,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) fail: conn_reconfig_done(connection); if (nbc) { - close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev); - close_backing_dev(device, nbc->backing_bdev, true); + close_backing_dev(device, nbc->f_md_bdev, + nbc->md_bdev != nbc->backing_bdev); + close_backing_dev(device, nbc->backing_bdev_file, true); kfree(nbc); } kfree(new_disk_conf); @@ -2140,34 +2140,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) static int adm_detach(struct drbd_device *device, int force) { - enum drbd_state_rv retcode; - void *buffer; - int ret; - if (force) { set_bit(FORCE_DETACH, &device->flags); drbd_force_state(device, NS(disk, D_FAILED)); - retcode = SS_SUCCESS; - goto out; + return SS_SUCCESS; } - drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ - buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ - if (buffer) { - retcode = drbd_request_state(device, NS(disk, D_FAILED)); - drbd_md_put_buffer(device); - } else /* already <= D_FAILED */ - retcode = SS_NOTHING_TO_DO; - /* D_FAILED will transition to DISKLESS. */ - drbd_resume_io(device); - ret = wait_event_interruptible(device->misc_wait, - device->state.disk != D_FAILED); - if ((int)retcode == (int)SS_IS_DISKLESS) - retcode = SS_NOTHING_TO_DO; - if (ret) - retcode = ERR_INTR; -out: - return retcode; + return drbd_request_detach_interruptible(device); } /* Detaching the disk is a process in multiple stages. First we need to lock @@ -2315,10 +2294,10 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c } struct crypto { - struct crypto_ahash *verify_tfm; - struct crypto_ahash *csums_tfm; + struct crypto_shash *verify_tfm; + struct crypto_shash *csums_tfm; struct crypto_shash *cram_hmac_tfm; - struct crypto_ahash *integrity_tfm; + struct crypto_shash *integrity_tfm; }; static int @@ -2336,36 +2315,21 @@ alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg) return NO_ERROR; } -static int -alloc_ahash(struct crypto_ahash **tfm, char *tfm_name, int err_alg) -{ - if (!tfm_name[0]) - return NO_ERROR; - - *tfm = crypto_alloc_ahash(tfm_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(*tfm)) { - *tfm = NULL; - return err_alg; - } - - return NO_ERROR; -} - static enum drbd_ret_code alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) { char hmac_name[CRYPTO_MAX_ALG_NAME]; enum drbd_ret_code rv; - rv = alloc_ahash(&crypto->csums_tfm, new_net_conf->csums_alg, + rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg, ERR_CSUMS_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_ahash(&crypto->verify_tfm, new_net_conf->verify_alg, + rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg, ERR_VERIFY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_ahash(&crypto->integrity_tfm, new_net_conf->integrity_alg, + rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg, ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; @@ -2383,9 +2347,9 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) static void free_crypto(struct crypto *crypto) { crypto_free_shash(crypto->cram_hmac_tfm); - crypto_free_ahash(crypto->integrity_tfm); - crypto_free_ahash(crypto->csums_tfm); - crypto_free_ahash(crypto->verify_tfm); + crypto_free_shash(crypto->integrity_tfm); + crypto_free_shash(crypto->csums_tfm); + crypto_free_shash(crypto->verify_tfm); } int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) @@ -2462,17 +2426,17 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(connection->net_conf, new_net_conf); if (!rsr) { - crypto_free_ahash(connection->csums_tfm); + crypto_free_shash(connection->csums_tfm); connection->csums_tfm = crypto.csums_tfm; crypto.csums_tfm = NULL; } if (!ovr) { - crypto_free_ahash(connection->verify_tfm); + crypto_free_shash(connection->verify_tfm); connection->verify_tfm = crypto.verify_tfm; crypto.verify_tfm = NULL; } - crypto_free_ahash(connection->integrity_tfm); + crypto_free_shash(connection->integrity_tfm); connection->integrity_tfm = crypto.integrity_tfm; if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100) /* Do this without trying to take connection->data.mutex again. */ @@ -2483,8 +2447,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&connection->resource->conf_update); mutex_unlock(&connection->data.mutex); - synchronize_rcu(); - kfree(old_net_conf); + kvfree_rcu_mightsleep(old_net_conf); if (connection->cstate >= C_WF_REPORT_PARAMS) { struct drbd_peer_device *peer_device; @@ -2542,6 +2505,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) struct drbd_resource *resource; struct drbd_connection *connection; enum drbd_ret_code retcode; + enum drbd_state_rv rv; int i; int err; @@ -2661,11 +2625,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } rcu_read_unlock(); - retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); + rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); conn_reconfig_done(connection); mutex_unlock(&adm_ctx.resource->adm_mutex); - drbd_adm_finish(&adm_ctx, info, retcode); + drbd_adm_finish(&adm_ctx, info, rv); return 0; fail: @@ -2681,8 +2645,10 @@ out: static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) { + enum drbd_conns cstate; enum drbd_state_rv rv; +repeat: rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), force ? CS_HARD : 0); @@ -2700,6 +2666,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection break; case SS_CW_FAILED_BY_PEER: + spin_lock_irq(&connection->resource->req_lock); + cstate = connection->cstate; + spin_unlock_irq(&connection->resource->req_lock); + if (cstate <= C_WF_CONNECTION) + goto repeat; /* The peer probably wants to see us outdated. */ rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, disk, D_OUTDATED), 0); @@ -2766,11 +2737,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) mutex_lock(&adm_ctx.resource->adm_mutex); rv = conn_try_disconnect(connection, parms.force_disconnect); - if (rv < SS_SUCCESS) - retcode = rv; /* FIXME: Type mismatch. */ - else - retcode = NO_ERROR; mutex_unlock(&adm_ctx.resource->adm_mutex); + if (rv < SS_SUCCESS) { + drbd_adm_finish(&adm_ctx, info, rv); + return 0; + } + retcode = NO_ERROR; fail: drbd_adm_finish(&adm_ctx, info, retcode); return 0; @@ -2889,8 +2861,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) new_disk_conf->disk_size = (sector_t)rs.resize_size; rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); mutex_unlock(&device->resource->conf_update); - synchronize_rcu(); - kfree(old_disk_conf); + kvfree_rcu_mightsleep(old_disk_conf); new_disk_conf = NULL; } @@ -3002,7 +2973,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from invalidate", BM_LOCKED_MASK)) + "set_n_write from invalidate", BM_LOCKED_MASK, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -3035,11 +3006,12 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) +static int drbd_bmio_set_susp_al(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { int rv; - rv = drbd_bmio_set_n_write(device); + rv = drbd_bmio_set_n_write(device, peer_device); drbd_suspend_al(device); return rv; } @@ -3082,7 +3054,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -3215,7 +3187,7 @@ static int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_device *device) { struct nlattr *nla; - nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); + nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; if (device && @@ -3328,7 +3300,6 @@ static void device_to_statistics(struct device_statistics *s, if (get_ldev(device)) { struct drbd_md *md = &device->ldev->md; u64 *history_uuids = (u64 *)s->history_uuids; - struct request_queue *q; int n; spin_lock_irq(&md->uuid_lock); @@ -3342,14 +3313,9 @@ static void device_to_statistics(struct device_statistics *s, spin_unlock_irq(&md->uuid_lock); s->dev_disk_flags = md->flags; - q = bdev_get_queue(device->ldev->backing_bdev); - s->dev_lower_blocked = - bdi_congested(q->backing_dev_info, - (1 << WB_async_congested) | - (1 << WB_sync_congested)); put_ldev(device); } - s->dev_size = drbd_get_capacity(device->this_bdev); + s->dev_size = get_capacity(device->vdisk); s->dev_read = device->read_cnt; s->dev_write = device->writ_cnt; s->dev_al_writes = device->al_writ_cnt; @@ -3381,7 +3347,7 @@ int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *resource_filter; struct drbd_resource *resource; - struct drbd_device *uninitialized_var(device); + struct drbd_device *device; int minor, err, retcode; struct drbd_genlmsghdr *dh; struct device_info device_info; @@ -3470,7 +3436,7 @@ int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *resource_filter; struct drbd_resource *resource = NULL, *next_resource; - struct drbd_connection *uninitialized_var(connection); + struct drbd_connection *connection; int err = 0, retcode; struct drbd_genlmsghdr *dh; struct connection_info connection_info; @@ -3632,7 +3598,7 @@ int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *resource_filter; struct drbd_resource *resource; - struct drbd_device *uninitialized_var(device); + struct drbd_device *device; struct drbd_peer_device *peer_device = NULL; int minor, err, retcode; struct drbd_genlmsghdr *dh; @@ -3783,14 +3749,13 @@ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, if (err) goto nla_put_failure; - nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); + nla = nla_nest_start_noflag(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || nla_put_u32(skb, T_current_state, device->state.i) || nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) || - nla_put_u64_0pad(skb, T_capacity, - drbd_get_capacity(device->this_bdev)) || + nla_put_u64_0pad(skb, T_capacity, get_capacity(device->vdisk)) || nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) || nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) || nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) || @@ -3841,7 +3806,7 @@ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, if (nla_put_u32(skb, T_helper_exit_code, sib->helper_exit_code)) goto nla_put_failure; - /* fall through */ + fallthrough; case SIB_HELPER_PRE: if (nla_put_string(skb, T_helper, sib->helper_name)) goto nla_put_failure; @@ -4185,7 +4150,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) if (args.clear_bm) { err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, - "clear_n_write from new_c_uuid", BM_LOCKED_MASK); + "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL); if (err) { drbd_err(device, "Writing bitmap failed with %d\n", err); retcode = ERR_IO_MD_DISK; @@ -4304,7 +4269,7 @@ static void device_to_info(struct device_info *info, int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; - struct drbd_genlmsghdr *dh = info->userhdr; + struct drbd_genlmsghdr *dh = genl_info_userhdr(info); enum drbd_ret_code retcode; retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); @@ -4588,7 +4553,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4630,16 +4595,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4679,16 +4645,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4728,16 +4695,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4778,13 +4746,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4798,7 +4767,7 @@ void notify_helper(enum drbd_notification_type type, struct drbd_genlmsghdr *dh; int err; - strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name)); + strscpy(helper_info.helper_name, name, sizeof(helper_info.helper_name)); helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name)); helper_info.helper_status = status; @@ -4835,7 +4804,7 @@ fail: err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4849,11 +4818,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4880,6 +4850,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4888,32 +4859,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4928,7 +4899,10 @@ next: cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) |
