summaryrefslogtreecommitdiff
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c689
1 files changed, 439 insertions, 250 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5a1f98494ddd..f6c33b21f69e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -11,6 +11,8 @@
* (part of code stolen from loop.c)
*/
+#define pr_fmt(fmt) "nbd: " fmt
+
#include <linux/major.h>
#include <linux/blkdev.h>
@@ -60,11 +62,13 @@ struct nbd_sock {
bool dead;
int fallback_index;
int cookie;
+ struct work_struct work;
};
struct recv_thread_args {
struct work_struct work;
struct nbd_device *nbd;
+ struct nbd_sock *nsock;
int index;
};
@@ -138,6 +142,9 @@ struct nbd_device {
*/
#define NBD_CMD_INFLIGHT 2
+/* Just part of request header or data payload is sent successfully */
+#define NBD_CMD_PARTIAL_SEND 3
+
struct nbd_cmd {
struct nbd_device *nbd;
struct mutex lock;
@@ -155,8 +162,6 @@ static struct dentry *nbd_dbg_dir;
#define nbd_name(nbd) ((nbd)->disk->disk_name)
-#define NBD_MAGIC 0x68797548
-
#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
@@ -180,6 +185,17 @@ static void nbd_requeue_cmd(struct nbd_cmd *cmd)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
+ lockdep_assert_held(&cmd->lock);
+
+ /*
+ * Clear INFLIGHT flag so that this cmd won't be completed in
+ * normal completion path
+ *
+ * INFLIGHT flag will be set when the cmd is queued to nbd next
+ * time.
+ */
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+
if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
blk_mq_requeue_request(req, true);
}
@@ -221,7 +237,7 @@ static ssize_t pid_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+ struct nbd_device *nbd = disk->private_data;
return sprintf(buf, "%d\n", nbd->pid);
}
@@ -235,7 +251,7 @@ static ssize_t backend_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+ struct nbd_device *nbd = disk->private_data;
return sprintf(buf, "%s\n", nbd->backend ?: "");
}
@@ -250,7 +266,6 @@ static void nbd_dev_remove(struct nbd_device *nbd)
struct gendisk *disk = nbd->disk;
del_gendisk(disk);
- blk_cleanup_disk(disk);
blk_mq_free_tag_set(&nbd->tag_set);
/*
@@ -261,7 +276,7 @@ static void nbd_dev_remove(struct nbd_device *nbd)
idr_remove(&nbd_index_idr, nbd->index);
mutex_unlock(&nbd_index_mutex);
destroy_workqueue(nbd->recv_workq);
- kfree(nbd);
+ put_disk(disk);
}
static void nbd_dev_remove_work(struct work_struct *work)
@@ -296,7 +311,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
if (args) {
INIT_WORK(&args->work, nbd_dead_link_work);
args->index = nbd->index;
- queue_work(system_wq, &args->work);
+ queue_work(system_percpu_wq, &args->work);
}
}
if (!nsock->dead) {
@@ -316,28 +331,49 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
nsock->sent = 0;
}
-static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
- loff_t blksize)
+static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize)
{
+ struct queue_limits lim;
+ int error;
+
if (!blksize)
blksize = 1u << NBD_DEF_BLKSIZE_BITS;
if (blk_validate_block_size(blksize))
return -EINVAL;
+ if (bytesize < 0)
+ return -EINVAL;
+
nbd->config->bytesize = bytesize;
nbd->config->blksize_bits = __ffs(blksize);
if (!nbd->pid)
return 0;
- if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
- nbd->disk->queue->limits.discard_granularity = blksize;
- nbd->disk->queue->limits.discard_alignment = blksize;
- blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
- }
- blk_queue_logical_block_size(nbd->disk->queue, blksize);
- blk_queue_physical_block_size(nbd->disk->queue, blksize);
+ lim = queue_limits_start_update(nbd->disk->queue);
+ if (nbd->config->flags & NBD_FLAG_SEND_TRIM)
+ lim.max_hw_discard_sectors = UINT_MAX >> SECTOR_SHIFT;
+ else
+ lim.max_hw_discard_sectors = 0;
+ if (!(nbd->config->flags & NBD_FLAG_SEND_FLUSH)) {
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
+ } else if (nbd->config->flags & NBD_FLAG_SEND_FUA) {
+ lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
+ } else {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ lim.features &= ~BLK_FEAT_FUA;
+ }
+ if (nbd->config->flags & NBD_FLAG_ROTATIONAL)
+ lim.features |= BLK_FEAT_ROTATIONAL;
+ if (nbd->config->flags & NBD_FLAG_SEND_WRITE_ZEROES)
+ lim.max_write_zeroes_sectors = UINT_MAX >> SECTOR_SHIFT;
+
+ lim.logical_block_size = blksize;
+ lim.physical_block_size = blksize;
+ error = queue_limits_commit_update_frozen(nbd->disk->queue, &lim);
+ if (error)
+ return error;
if (max_part)
set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
@@ -389,13 +425,30 @@ static u32 req_to_nbd_cmd_type(struct request *req)
return NBD_CMD_WRITE;
case REQ_OP_READ:
return NBD_CMD_READ;
+ case REQ_OP_WRITE_ZEROES:
+ return NBD_CMD_WRITE_ZEROES;
default:
return U32_MAX;
}
}
-static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
- bool reserved)
+static struct nbd_config *nbd_get_config_unlocked(struct nbd_device *nbd)
+{
+ if (refcount_inc_not_zero(&nbd->config_refs)) {
+ /*
+ * Add smp_mb__after_atomic to ensure that reading nbd->config_refs
+ * and reading nbd->config is ordered. The pair is the barrier in
+ * nbd_alloc_and_init_config(), avoid nbd->config_refs is set
+ * before nbd->config.
+ */
+ smp_mb__after_atomic();
+ return nbd->config;
+ }
+
+ return NULL;
+}
+
+static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
struct nbd_device *nbd = cmd->nbd;
@@ -404,17 +457,24 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
- if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ /* partial send is handled in nbd_sock's work function */
+ if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return BLK_EH_RESET_TIMER;
+ }
+
+ if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
mutex_unlock(&cmd->lock);
return BLK_EH_DONE;
}
- if (!refcount_inc_not_zero(&nbd->config_refs)) {
+ config = nbd_get_config_unlocked(nbd);
+ if (!config) {
cmd->status = BLK_STS_TIMEOUT;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
goto done;
}
- config = nbd->config;
if (config->num_connections > 1 ||
(config->num_connections == 1 && nbd->tag_set.timeout)) {
@@ -443,8 +503,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
nbd_mark_nsock_dead(nbd, nsock, 1);
mutex_unlock(&nsock->tx_lock);
}
- mutex_unlock(&cmd->lock);
nbd_requeue_cmd(cmd);
+ mutex_unlock(&cmd->lock);
nbd_config_put(nbd);
return BLK_EH_DONE;
}
@@ -479,6 +539,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
@@ -487,17 +548,11 @@ done:
return BLK_EH_DONE;
}
-/*
- * Send or receive packet. Return a positive value on success and
- * negtive value on failue, and never return 0.
- */
-static int sock_xmit(struct nbd_device *nbd, int index, int send,
- struct iov_iter *iter, int msg_flags, int *sent)
+static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
+ struct iov_iter *iter, int msg_flags, int *sent)
{
- struct nbd_config *config = nbd->config;
- struct socket *sock = config->socks[index]->sock;
int result;
- struct msghdr msg;
+ struct msghdr msg = {} ;
unsigned int noreclaim_flag;
if (unlikely(!sock)) {
@@ -510,27 +565,27 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
msg.msg_iter = *iter;
noreclaim_flag = memalloc_noreclaim_save();
- do {
- sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = msg_flags | MSG_NOSIGNAL;
-
- if (send)
- result = sock_sendmsg(sock, &msg);
- else
- result = sock_recvmsg(sock, &msg, msg.msg_flags);
-
- if (result <= 0) {
- if (result == 0)
- result = -EPIPE; /* short read */
- break;
- }
- if (sent)
- *sent += result;
- } while (msg_data_left(&msg));
+
+ scoped_with_kernel_creds() {
+ do {
+ sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
+ sock->sk->sk_use_task_frag = false;
+ msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+
+ if (send)
+ result = sock_sendmsg(sock, &msg);
+ else
+ result = sock_recvmsg(sock, &msg, msg.msg_flags);
+
+ if (result <= 0) {
+ if (result == 0)
+ result = -EPIPE; /* short read */
+ break;
+ }
+ if (sent)
+ *sent += result;
+ } while (msg_data_left(&msg));
+ }
memalloc_noreclaim_restore(noreclaim_flag);
@@ -538,6 +593,19 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
}
/*
+ * Send or receive packet. Return a positive value on success and
+ * negtive value on failure, and never return 0.
+ */
+static int sock_xmit(struct nbd_device *nbd, int index, int send,
+ struct iov_iter *iter, int msg_flags, int *sent)
+{
+ struct nbd_config *config = nbd->config;
+ struct socket *sock = config->socks[index]->sock;
+
+ return __sock_xmit(nbd, sock, send, iter, msg_flags, sent);
+}
+
+/*
* Different settings for sk->sk_sndtimeo can result in different return values
* if there is a signal pending when we enter sendmsg, because reasons?
*/
@@ -546,8 +614,36 @@ static inline int was_interrupted(int result)
return result == -ERESTARTSYS || result == -EINTR;
}
-/* always call with the tx_lock held */
-static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
+/*
+ * We've already sent header or part of data payload, have no choice but
+ * to set pending and schedule it in work.
+ *
+ * And we have to return BLK_STS_OK to block core, otherwise this same
+ * request may be re-dispatched with different tag, but our header has
+ * been sent out with old tag, and this way does confuse reply handling.
+ */
+static void nbd_sched_pending_work(struct nbd_device *nbd,
+ struct nbd_sock *nsock,
+ struct nbd_cmd *cmd, int sent)
+{
+ struct request *req = blk_mq_rq_from_pdu(cmd);
+
+ /* pending work should be scheduled only once */
+ WARN_ON_ONCE(test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags));
+
+ nsock->pending = req;
+ nsock->sent = sent;
+ set_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
+ refcount_inc(&nbd->config_refs);
+ schedule_work(&nsock->work);
+}
+
+/*
+ * Returns BLK_STS_RESOURCE if the caller should retry after a delay.
+ * Returns BLK_STS_IOERR if sending failed.
+ */
+static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
+ int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
struct nbd_config *config = nbd->config;
@@ -556,28 +652,32 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
struct iov_iter from;
- unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u64 handle;
u32 type;
u32 nbd_cmd_flags = 0;
int sent = nsock->sent, skip = 0;
- iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
+ lockdep_assert_held(&cmd->lock);
+ lockdep_assert_held(&nsock->tx_lock);
+
+ iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request));
type = req_to_nbd_cmd_type(req);
if (type == U32_MAX)
- return -EIO;
+ return BLK_STS_IOERR;
if (rq_data_dir(req) == WRITE &&
(config->flags & NBD_FLAG_READ_ONLY)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Write on read-only\n");
- return -EIO;
+ return BLK_STS_IOERR;
}
if (req->cmd_flags & REQ_FUA)
nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+ if ((req->cmd_flags & REQ_NOUNMAP) && (type == NBD_CMD_WRITE_ZEROES))
+ nbd_cmd_flags |= NBD_CMD_FLAG_NO_HOLE;
/* We did a partial send previously, and we at least sent the whole
* request struct, so just go and send the rest of the pages in the
@@ -602,10 +702,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
- request.len = htonl(size);
+ request.len = htonl(blk_rq_bytes(req));
}
handle = nbd_cmd_handle(cmd);
- memcpy(request.handle, &handle, sizeof(handle));
+ request.cookie = cpu_to_be64(handle);
trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
@@ -617,21 +717,21 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
trace_nbd_header_sent(req, handle);
if (result < 0) {
if (was_interrupted(result)) {
- /* If we havne't sent anything we can just return BUSY,
+ /* If we haven't sent anything we can just return BUSY,
* however if we have sent something we need to make
* sure we only allow this req to be sent until we are
* completely done.
*/
if (sent) {
- nsock->pending = req;
- nsock->sent = sent;
+ nbd_sched_pending_work(nbd, nsock, cmd, sent);
+ return BLK_STS_OK;
}
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
- return -EAGAIN;
+ goto requeue;
}
send_pages:
if (type != NBD_CMD_WRITE)
@@ -649,7 +749,7 @@ send_pages:
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
req, bvec.bv_len);
- iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&from, ITER_SOURCE, &bvec, 1, bvec.bv_len);
if (skip) {
if (skip >= iov_iter_count(&from)) {
skip -= iov_iter_count(&from);
@@ -661,19 +761,13 @@ send_pages:
result = sock_xmit(nbd, index, 1, &from, flags, &sent);
if (result < 0) {
if (was_interrupted(result)) {
- /* We've already sent the header, we
- * have no choice but to set pending and
- * return BUSY.
- */
- nsock->pending = req;
- nsock->sent = sent;
- set_bit(NBD_CMD_REQUEUED, &cmd->flags);
- return BLK_STS_RESOURCE;
+ nbd_sched_pending_work(nbd, nsock, cmd, sent);
+ return BLK_STS_OK;
}
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
result);
- return -EAGAIN;
+ goto requeue;
}
/*
* The completion might already have come in,
@@ -690,10 +784,65 @@ out:
trace_nbd_payload_sent(req, handle);
nsock->pending = NULL;
nsock->sent = 0;
- return 0;
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ return BLK_STS_OK;
+
+requeue:
+ /*
+ * Can't requeue in case we are dealing with partial send
+ *
+ * We must run from pending work function.
+ * */
+ if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
+ return BLK_STS_OK;
+
+ /* retry on a different socket */
+ dev_err_ratelimited(disk_to_dev(nbd->disk),
+ "Request send failed, requeueing\n");
+ nbd_mark_nsock_dead(nbd, nsock, 1);
+ nbd_requeue_cmd(cmd);
+ return BLK_STS_OK;
+}
+
+/* handle partial sending */
+static void nbd_pending_cmd_work(struct work_struct *work)
+{
+ struct nbd_sock *nsock = container_of(work, struct nbd_sock, work);
+ struct request *req = nsock->pending;
+ struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+ struct nbd_device *nbd = cmd->nbd;
+ unsigned long deadline = READ_ONCE(req->deadline);
+ unsigned int wait_ms = 2;
+
+ mutex_lock(&cmd->lock);
+
+ WARN_ON_ONCE(test_bit(NBD_CMD_REQUEUED, &cmd->flags));
+ if (WARN_ON_ONCE(!test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)))
+ goto out;
+
+ mutex_lock(&nsock->tx_lock);
+ while (true) {
+ nbd_send_cmd(nbd, cmd, cmd->index);
+ if (!nsock->pending)
+ break;
+
+ /* don't bother timeout handler for partial sending */
+ if (READ_ONCE(jiffies) + msecs_to_jiffies(wait_ms) >= deadline) {
+ cmd->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
+ break;
+ }
+ msleep(wait_ms);
+ wait_ms *= 2;
+ }
+ mutex_unlock(&nsock->tx_lock);
+ clear_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
+out:
+ mutex_unlock(&cmd->lock);
+ nbd_config_put(nbd);
}
-static int nbd_read_reply(struct nbd_device *nbd, int index,
+static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
struct nbd_reply *reply)
{
struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
@@ -701,8 +850,8 @@ static int nbd_read_reply(struct nbd_device *nbd, int index,
int result;
reply->magic = 0;
- iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply));
- result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
+ iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply));
+ result = __sock_xmit(nbd, sock, 0, &to, MSG_WAITALL, NULL);
if (result < 0) {
if (!nbd_disconnected(nbd->config))
dev_err(disk_to_dev(nbd->disk),
@@ -731,7 +880,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
u32 tag;
int ret = 0;
- memcpy(&handle, reply->handle, sizeof(handle));
+ handle = be64_to_cpu(reply->cookie);
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
@@ -746,7 +895,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
- if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
tag, cmd->status, cmd->flags);
ret = -ENOENT;
@@ -790,7 +939,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
struct iov_iter to;
rq_for_each_segment(bvec, req, iter) {
- iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&to, ITER_DEST, &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result < 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -826,14 +975,14 @@ static void recv_work(struct work_struct *work)
struct nbd_device *nbd = args->nbd;
struct nbd_config *config = nbd->config;
struct request_queue *q = nbd->disk->queue;
- struct nbd_sock *nsock;
+ struct nbd_sock *nsock = args->nsock;
struct nbd_cmd *cmd;
struct request *rq;
while (1) {
struct nbd_reply reply;
- if (nbd_read_reply(nbd, args->index, &reply))
+ if (nbd_read_reply(nbd, nsock->sock, &reply))
break;
/*
@@ -855,23 +1004,30 @@ static void recv_work(struct work_struct *work)
}
rq = blk_mq_rq_from_pdu(cmd);
- if (likely(!blk_should_fake_timeout(rq->q)))
- blk_mq_complete_request(rq);
+ if (likely(!blk_should_fake_timeout(rq->q))) {
+ bool complete;
+
+ mutex_lock(&cmd->lock);
+ complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
+ &cmd->flags);
+ mutex_unlock(&cmd->lock);
+ if (complete)
+ blk_mq_complete_request(rq);
+ }
percpu_ref_put(&q->q_usage_counter);
}
- nsock = config->socks[args->index];
mutex_lock(&nsock->tx_lock);
nbd_mark_nsock_dead(nbd, nsock, 1);
mutex_unlock(&nsock->tx_lock);
- nbd_config_put(nbd);
atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq);
+ nbd_config_put(nbd);
kfree(args);
}
-static bool nbd_clear_req(struct request *req, void *data, bool reserved)
+static bool nbd_clear_req(struct request *req, void *data)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
@@ -947,33 +1103,39 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
- if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
+
+ if (!wait_event_timeout(config->conn_wait,
+ test_bit(NBD_RT_DISCONNECTED,
+ &config->runtime_flags) ||
+ atomic_read(&config->live_connections) > 0,
+ config->dead_conn_timeout))
return 0;
- return wait_event_timeout(config->conn_wait,
- atomic_read(&config->live_connections) > 0,
- config->dead_conn_timeout) > 0;
+
+ return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
}
-static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
+static blk_status_t nbd_handle_cmd(struct nbd_cmd *cmd, int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
struct nbd_sock *nsock;
- int ret;
+ blk_status_t ret;
+
+ lockdep_assert_held(&cmd->lock);
- if (!refcount_inc_not_zero(&nbd->config_refs)) {
+ config = nbd_get_config_unlocked(nbd);
+ if (!config) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Socks array is empty\n");
- return -EINVAL;
+ return BLK_STS_IOERR;
}
- config = nbd->config;
if (index >= config->num_connections) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n");
nbd_config_put(nbd);
- return -EINVAL;
+ return BLK_STS_IOERR;
}
cmd->status = BLK_STS_OK;
again:
@@ -996,7 +1158,7 @@ again:
*/
sock_shutdown(nbd);
nbd_config_put(nbd);
- return -EIO;
+ return BLK_STS_IOERR;
}
goto again;
}
@@ -1009,27 +1171,10 @@ again:
blk_mq_start_request(req);
if (unlikely(nsock->pending && nsock->pending != req)) {
nbd_requeue_cmd(cmd);
- ret = 0;
+ ret = BLK_STS_OK;
goto out;
}
- /*
- * Some failures are related to the link going down, so anything that
- * returns EAGAIN can be retried on a different socket.
- */
ret = nbd_send_cmd(nbd, cmd, index);
- /*
- * Access to this flag is protected by cmd->lock, thus it's safe to set
- * the flag after nbd_send_cmd() succeed to send request to server.
- */
- if (!ret)
- __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
- else if (ret == -EAGAIN) {
- dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Request send failed, requeueing\n");
- nbd_mark_nsock_dead(nbd, nsock, 1);
- nbd_requeue_cmd(cmd);
- ret = 0;
- }
out:
mutex_unlock(&nsock->tx_lock);
nbd_config_put(nbd);
@@ -1040,7 +1185,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
- int ret;
+ blk_status_t ret;
/*
* Since we look at the bio's to send the request over the network we
@@ -1060,10 +1205,6 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
* appropriate.
*/
ret = nbd_handle_cmd(cmd, hctx->queue_num);
- if (ret < 0)
- ret = BLK_STS_IOERR;
- else if (!ret)
- ret = BLK_STS_OK;
mutex_unlock(&cmd->lock);
return ret;
@@ -1079,6 +1220,14 @@ static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
if (!sock)
return NULL;
+ if (!sk_is_tcp(sock->sk) &&
+ !sk_is_stream_unix(sock->sk)) {
+ dev_err(disk_to_dev(nbd->disk), "Unsupported socket: should be TCP or UNIX.\n");
+ *err = -EINVAL;
+ sockfd_put(sock);
+ return NULL;
+ }
+
if (sock->ops->shutdown == sock_no_shutdown) {
dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
*err = -EINVAL;
@@ -1096,8 +1245,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
struct socket *sock;
struct nbd_sock **socks;
struct nbd_sock *nsock;
+ unsigned int memflags;
int err;
+ /* Arg will be cast to int, check it to avoid overflow */
+ if (arg > INT_MAX)
+ return -EINVAL;
sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
@@ -1106,7 +1259,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
* We need to make sure we don't get any errant requests while we're
* reallocating the ->socks array.
*/
- blk_mq_freeze_queue(nbd->disk->queue);
+ memflags = blk_mq_freeze_queue(nbd->disk->queue);
if (!netlink && !nbd->task_setup &&
!test_bit(NBD_RT_BOUND, &config->runtime_flags))
@@ -1144,14 +1297,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
nsock->pending = NULL;
nsock->sent = 0;
nsock->cookie = 0;
+ INIT_WORK(&nsock->work, nbd_pending_cmd_work);
socks[config->num_connections++] = nsock;
atomic_inc(&config->live_connections);
- blk_mq_unfreeze_queue(nbd->disk->queue);
+ blk_mq_unfreeze_queue(nbd->disk->queue, memflags);
return 0;
put_socket:
- blk_mq_unfreeze_queue(nbd->disk->queue);
+ blk_mq_unfreeze_queue(nbd->disk->queue, memflags);
sockfd_put(sock);
return err;
}
@@ -1197,6 +1351,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
INIT_WORK(&args->work, recv_work);
args->index = i;
args->nbd = nbd;
+ args->nsock = nsock;
nsock->cookie++;
mutex_unlock(&nsock->tx_lock);
sockfd_put(old);
@@ -1217,30 +1372,19 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
return -ENOSPC;
}
-static void nbd_bdev_reset(struct block_device *bdev)
+static void nbd_bdev_reset(struct nbd_device *nbd)
{
- if (bdev->bd_openers > 1)
+ if (disk_openers(nbd->disk) > 1)
return;
- set_capacity(bdev->bd_disk, 0);
+ set_capacity(nbd->disk, 0);
}
static void nbd_parse_flags(struct nbd_device *nbd)
{
- struct nbd_config *config = nbd->config;
- if (config->flags & NBD_FLAG_READ_ONLY)
+ if (nbd->config->flags & NBD_FLAG_READ_ONLY)
set_disk_ro(nbd->disk, true);
else
set_disk_ro(nbd->disk, false);
- if (config->flags & NBD_FLAG_SEND_TRIM)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
- if (config->flags & NBD_FLAG_SEND_FLUSH) {
- if (config->flags & NBD_FLAG_SEND_FUA)
- blk_queue_write_cache(nbd->disk->queue, true, true);
- else
- blk_queue_write_cache(nbd->disk->queue, true, false);
- }
- else
- blk_queue_write_cache(nbd->disk->queue, false, false);
}
static void send_disconnects(struct nbd_device *nbd)
@@ -1257,7 +1401,7 @@ static void send_disconnects(struct nbd_device *nbd)
for (i = 0; i < config->num_connections; i++) {
struct nbd_sock *nsock = config->socks[i];
- iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request));
mutex_lock(&nsock->tx_lock);
ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
if (ret < 0)
@@ -1317,10 +1461,6 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->config = NULL;
nbd->tag_set.timeout = 0;
- nbd->disk->queue->limits.discard_granularity = 0;
- nbd->disk->queue->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
@@ -1344,7 +1484,17 @@ static int nbd_start_device(struct nbd_device *nbd)
return -EINVAL;
}
- blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
+retry:
+ mutex_unlock(&nbd->config_lock);
+ blk_mq_update_nr_hw_queues(&nbd->tag_set, num_connections);
+ mutex_lock(&nbd->config_lock);
+
+ /* if another code path updated nr_hw_queues, retry until succeed */
+ if (num_connections != config->num_connections) {
+ num_connections = config->num_connections;
+ goto retry;
+ }
+
nbd->pid = task_pid_nr(current);
nbd_parse_flags(nbd);
@@ -1383,13 +1533,14 @@ static int nbd_start_device(struct nbd_device *nbd)
refcount_inc(&nbd->config_refs);
INIT_WORK(&args->work, recv_work);
args->nbd = nbd;
+ args->nsock = config->socks[i];
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
-static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device_ioctl(struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
int ret;
@@ -1403,12 +1554,14 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0);
- if (ret)
+ if (ret) {
sock_shutdown(nbd);
- flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ }
+ flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
- nbd_bdev_reset(bdev);
+ nbd_bdev_reset(nbd);
/* user requested, ignore socket errors */
if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
@@ -1417,12 +1570,11 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
return ret;
}
-static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
- struct block_device *bdev)
+static void nbd_clear_sock_ioctl(struct nbd_device *nbd)
{
- sock_shutdown(nbd);
- __invalidate_device(bdev, true);
- nbd_bdev_reset(bdev);
+ nbd_clear_sock(nbd);
+ disk_force_media_change(nbd->disk);
+ nbd_bdev_reset(nbd);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
@@ -1448,7 +1600,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_DISCONNECT:
return nbd_disconnect(nbd);
case NBD_CLEAR_SOCK:
- nbd_clear_sock_ioctl(nbd, bdev);
+ nbd_clear_sock_ioctl(nbd);
return 0;
case NBD_SET_SOCK:
return nbd_add_socket(nbd, arg, false);
@@ -1468,7 +1620,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
config->flags = arg;
return 0;
case NBD_DO_IT:
- return nbd_start_device_ioctl(nbd, bdev);
+ return nbd_start_device_ioctl(nbd);
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
@@ -1485,7 +1637,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
return -ENOTTY;
}
-static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+static int nbd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nbd_device *nbd = bdev->bd_disk->private_data;
@@ -1515,29 +1667,49 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
return error;
}
-static struct nbd_config *nbd_alloc_config(void)
+static int nbd_alloc_and_init_config(struct nbd_device *nbd)
{
struct nbd_config *config;
+ if (WARN_ON(nbd->config))
+ return -EINVAL;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
- if (!config)
- return NULL;
+ if (!config) {
+ module_put(THIS_MODULE);
+ return -ENOMEM;
+ }
+
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
- try_module_get(THIS_MODULE);
- return config;
+
+ nbd->config = config;
+ /*
+ * Order refcount_set(&nbd->config_refs, 1) and nbd->config assignment,
+ * its pair is the barrier in nbd_get_config_unlocked().
+ * So nbd_get_config_unlocked() won't see nbd->config as null after
+ * refcount_inc_not_zero() succeed.
+ */
+ smp_mb__before_atomic();
+ refcount_set(&nbd->config_refs, 1);
+
+ return 0;
}
-static int nbd_open(struct block_device *bdev, fmode_t mode)
+static int nbd_open(struct gendisk *disk, blk_mode_t mode)
{
struct nbd_device *nbd;
+ struct nbd_config *config;
int ret = 0;
mutex_lock(&nbd_index_mutex);
- nbd = bdev->bd_disk->private_data;
+ nbd = disk->private_data;
if (!nbd) {
ret = -ENXIO;
goto out;
@@ -1546,46 +1718,52 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
ret = -ENXIO;
goto out;
}
- if (!refcount_inc_not_zero(&nbd->config_refs)) {
- struct nbd_config *config;
+ config = nbd_get_config_unlocked(nbd);
+ if (!config) {
mutex_lock(&nbd->config_lock);
if (refcount_inc_not_zero(&nbd->config_refs)) {
mutex_unlock(&nbd->config_lock);
goto out;
}
- config = nbd->config = nbd_alloc_config();
- if (!config) {
- ret = -ENOMEM;
+ ret = nbd_alloc_and_init_config(nbd);
+ if (ret) {
mutex_unlock(&nbd->config_lock);
goto out;
}
- refcount_set(&nbd->config_refs, 1);
+
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
if (max_part)
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
- } else if (nbd_disconnected(nbd->config)) {
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+ } else if (nbd_disconnected(config)) {
if (max_part)
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
}
out:
mutex_unlock(&nbd_index_mutex);
return ret;
}
-static void nbd_release(struct gendisk *disk, fmode_t mode)
+static void nbd_release(struct gendisk *disk)
{
struct nbd_device *nbd = disk->private_data;
if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
- disk->part0->bd_openers == 0)
+ disk_openers(disk) == 0)
nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
nbd_put(nbd);
}
+static void nbd_free_disk(struct gendisk *disk)
+{
+ struct nbd_device *nbd = disk->private_data;
+
+ kfree(nbd);
+}
+
static const struct block_device_operations nbd_fops =
{
.owner = THIS_MODULE,
@@ -1593,6 +1771,7 @@ static const struct block_device_operations nbd_fops =
.release = nbd_release,
.ioctl = nbd_ioctl,
.compat_ioctl = nbd_ioctl,
+ .free_disk = nbd_free_disk,
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1628,6 +1807,10 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
seq_puts(s, "NBD_FLAG_SEND_FUA\n");
if (flags & NBD_FLAG_SEND_TRIM)
seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
+ if (flags & NBD_FLAG_SEND_WRITE_ZEROES)
+ seq_puts(s, "NBD_FLAG_SEND_WRITE_ZEROES\n");
+ if (flags & NBD_FLAG_ROTATIONAL)
+ seq_puts(s, "NBD_FLAG_ROTATIONAL\n");
return 0;
}
@@ -1643,7 +1826,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
return -EIO;
dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
- if (!dir) {
+ if (IS_ERR(dir)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
nbd_name(nbd));
return -EIO;
@@ -1669,7 +1852,7 @@ static int nbd_dbg_init(void)
struct dentry *dbg_dir;
dbg_dir = debugfs_create_dir("nbd", NULL);
- if (!dbg_dir)
+ if (IS_ERR(dbg_dir))
return -EIO;
nbd_dbg_dir = dbg_dir;
@@ -1723,6 +1906,12 @@ static const struct blk_mq_ops nbd_mq_ops = {
static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
+ struct queue_limits lim = {
+ .max_hw_sectors = 65536,
+ .io_opt = 256 << SECTOR_SHIFT,
+ .max_segments = USHRT_MAX,
+ .max_segment_size = UINT_MAX,
+ };
struct nbd_device *nbd;
struct gendisk *disk;
int err = -ENOMEM;
@@ -1736,8 +1925,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
nbd->tag_set.queue_depth = 128;
nbd->tag_set.numa_node = NUMA_NO_NODE;
nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
- nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
- BLK_MQ_F_BLOCKING;
+ nbd->tag_set.flags = BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
nbd->backend = NULL;
@@ -1753,7 +1941,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
if (err == -ENOSPC)
err = -EEXIST;
} else {
- err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
+ err = idr_alloc(&nbd_index_idr, nbd, 0,
+ (MINORMASK >> part_shift) + 1, GFP_KERNEL);
if (err >= 0)
index = err;
}
@@ -1762,7 +1951,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
if (err < 0)
goto out_free_tags;
- disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
+ disk = blk_mq_alloc_disk(&nbd->tag_set, &lim, NULL);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_free_idr;
@@ -1778,19 +1967,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
goto out_err_disk;
}
- /*
- * Tell the block layer that we are not a rotational device
- */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
- disk->queue->limits.discard_granularity = 0;
- disk->queue->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(disk->queue, 0);
- blk_queue_max_segment_size(disk->queue, UINT_MAX);
- blk_queue_max_segments(disk->queue, USHRT_MAX);
- blk_queue_max_hw_sectors(disk->queue, 65536);
- disk->queue->limits.max_sectors = 256;
-
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
/*
@@ -1800,17 +1976,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
refcount_set(&nbd->refs, 0);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
-
- /* Too big first_minor can cause duplicate creation of
- * sysfs files/links, since index << part_shift might overflow, or
- * MKDEV() expect that the max bits of first_minor is 20.
- */
disk->first_minor = index << part_shift;
- if (disk->first_minor < index || disk->first_minor > MINORMASK) {
- err = -EINVAL;
- goto out_free_work;
- }
-
disk->minors = 1 << part_shift;
disk->fops = &nbd_fops;
disk->private_data = nbd;
@@ -1829,7 +1995,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
out_free_work:
destroy_workqueue(nbd->recv_workq);
out_err_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_free_idr:
mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
@@ -1915,14 +2081,25 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- if (info->attrs[NBD_ATTR_INDEX])
+ if (info->attrs[NBD_ATTR_INDEX]) {
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
- if (!info->attrs[NBD_ATTR_SOCKETS]) {
- printk(KERN_ERR "nbd: must specify at least one socket\n");
+
+ /*
+ * Too big first_minor can cause duplicate creation of
+ * sysfs files/links, since index << part_shift might overflow, or
+ * MKDEV() expect that the max bits of first_minor is 20.
+ */
+ if (index < 0 || index > MINORMASK >> part_shift) {
+ pr_err("illegal input index %d\n", index);
+ return -EINVAL;
+ }
+ }
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SOCKETS)) {
+ pr_err("must specify at least one socket\n");
return -EINVAL;
}
- if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
- printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SIZE_BYTES)) {
+ pr_err("must specify a size in bytes for the device\n");
return -EINVAL;
}
again:
@@ -1936,7 +2113,7 @@ again:
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- pr_err("nbd: device at index %d is going down\n",
+ pr_err("device at index %d is going down\n",
index);
return -EINVAL;
}
@@ -1947,7 +2124,7 @@ again:
if (!nbd) {
nbd = nbd_dev_add(index, 2);
if (IS_ERR(nbd)) {
- pr_err("nbd: failed to add new device\n");
+ pr_err("failed to add new device\n");
return PTR_ERR(nbd);
}
}
@@ -1958,24 +2135,20 @@ again:
nbd_put(nbd);
if (index == -1)
goto again;
- printk(KERN_ERR "nbd: nbd%d already in use\n", index);
+ pr_err("nbd%d already in use\n", index);
return -EBUSY;
}
- if (WARN_ON(nbd->config)) {
- mutex_unlock(&nbd->config_lock);
- nbd_put(nbd);
- return -EINVAL;
- }
- config = nbd->config = nbd_alloc_config();
- if (!nbd->config) {
+
+ ret = nbd_alloc_and_init_config(nbd);
+ if (ret) {
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
- printk(KERN_ERR "nbd: couldn't allocate config\n");
- return -ENOMEM;
+ pr_err("couldn't allocate config\n");
+ return ret;
}
- refcount_set(&nbd->config_refs, 1);
- set_bit(NBD_RT_BOUND, &config->runtime_flags);
+ config = nbd->config;
+ set_bit(NBD_RT_BOUND, &config->runtime_flags);
ret = nbd_genl_size_set(info, nbd);
if (ret)
goto out;
@@ -2025,7 +2198,7 @@ again:
struct nlattr *socks[NBD_SOCK_MAX+1];
if (nla_type(attr) != NBD_SOCK_ITEM) {
- printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+ pr_err("socks must be embedded in a SOCK_ITEM attr\n");
ret = -EINVAL;
goto out;
}
@@ -2034,7 +2207,7 @@ again:
nbd_sock_policy,
info->extack);
if (ret != 0) {
- printk(KERN_ERR "nbd: error processing sock list\n");
+ pr_err("error processing sock list\n");
ret = -EINVAL;
goto out;
}
@@ -2046,9 +2219,7 @@ again:
goto out;
}
}
- ret = nbd_start_device(nbd);
- if (ret)
- goto out;
+
if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
GFP_KERNEL);
@@ -2064,13 +2235,16 @@ again:
goto out;
}
set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
+
+ ret = nbd_start_device(nbd);
out:
- mutex_unlock(&nbd->config_lock);
if (!ret) {
set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
refcount_inc(&nbd->config_refs);
nbd_connect_reply(info, nbd->index);
}
+ mutex_unlock(&nbd->config_lock);
+
nbd_config_put(nbd);
if (put_dev)
nbd_put(nbd);
@@ -2082,6 +2256,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
sock_shutdown(nbd);
+ wake_up(&nbd->config->conn_wait);
/*
* Make sure recv thread has finished, we can safely call nbd_clear_que()
* to cancel the inflight I/Os.
@@ -2089,6 +2264,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
flush_workqueue(nbd->recv_workq);
nbd_clear_que(nbd);
nbd->task_setup = NULL;
+ clear_bit(NBD_RT_BOUND, &nbd->config->runtime_flags);
mutex_unlock(&nbd->config_lock);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
@@ -2104,8 +2280,8 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- if (!info->attrs[NBD_ATTR_INDEX]) {
- printk(KERN_ERR "nbd: must specify an index to disconnect\n");
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) {
+ pr_err("must specify an index to disconnect\n");
return -EINVAL;
}
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
@@ -2113,14 +2289,12 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: couldn't find device at index %d\n",
- index);
+ pr_err("couldn't find device at index %d\n", index);
return -EINVAL;
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: device at index %d is going down\n",
- index);
+ pr_err("device at index %d is going down\n", index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
@@ -2144,8 +2318,8 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- if (!info->attrs[NBD_ATTR_INDEX]) {
- printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) {
+ pr_err("must specify a device to reconfigure\n");
return -EINVAL;
}
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
@@ -2153,8 +2327,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
- index);
+ pr_err("couldn't find a device at index %d\n", index);
return -EINVAL;
}
if (nbd->backend) {
@@ -2175,13 +2348,13 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: device at index %d is going down\n",
- index);
+ pr_err("device at index %d is going down\n", index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
- if (!refcount_inc_not_zero(&nbd->config_refs)) {
+ config = nbd_get_config_unlocked(nbd);
+ if (!config) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
nbd_put(nbd);
@@ -2189,7 +2362,6 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
}
mutex_lock(&nbd->config_lock);
- config = nbd->config;
if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
!nbd->pid) {
dev_err(nbd_to_dev(nbd),
@@ -2240,7 +2412,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
struct nlattr *socks[NBD_SOCK_MAX+1];
if (nla_type(attr) != NBD_SOCK_ITEM) {
- printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+ pr_err("socks must be embedded in a SOCK_ITEM attr\n");
ret = -EINVAL;
goto out;
}
@@ -2249,7 +2421,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
nbd_sock_policy,
info->extack);
if (ret != 0) {
- printk(KERN_ERR "nbd: error processing sock list\n");
+ pr_err("error processing sock list\n");
ret = -EINVAL;
goto out;
}
@@ -2308,11 +2480,14 @@ static struct genl_family nbd_genl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = nbd_connect_genl_ops,
.n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
+ .resv_start_op = NBD_CMD_STATUS + 1,
.maxattr = NBD_ATTR_MAX,
+ .netnsok = 1,
.policy = nbd_attr_policy,
.mcgrps = nbd_mcast_grps,
.n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
};
+MODULE_ALIAS_GENL_FAMILY(NBD_GENL_FAMILY_NAME);
static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
{
@@ -2378,6 +2553,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
}
dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
+ if (!dev_list) {
+ nlmsg_free(reply);
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
if (index == -1) {
ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
if (ret) {
@@ -2466,7 +2647,7 @@ static int __init nbd_init(void)
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
if (max_part < 0) {
- printk(KERN_ERR "nbd: max_part must be >= 0\n");
+ pr_err("max_part must be >= 0\n");
return -EINVAL;
}
@@ -2529,6 +2710,12 @@ static void __exit nbd_cleanup(void)
struct nbd_device *nbd;
LIST_HEAD(del_list);
+ /*
+ * Unregister netlink interface prior to waiting
+ * for the completion of netlink commands.
+ */
+ genl_unregister_family(&nbd_genl_family);
+
nbd_dbg_close();
mutex_lock(&nbd_index_mutex);
@@ -2538,8 +2725,11 @@ static void __exit nbd_cleanup(void)
while (!list_empty(&del_list)) {
nbd = list_first_entry(&del_list, struct nbd_device, list);
list_del_init(&nbd->list);
+ if (refcount_read(&nbd->config_refs))
+ pr_err("possibly leaking nbd_config (ref %d)\n",
+ refcount_read(&nbd->config_refs));
if (refcount_read(&nbd->refs) != 1)
- printk(KERN_ERR "nbd: possibly leaking a device\n");
+ pr_err("possibly leaking a device\n");
nbd_put(nbd);
}
@@ -2547,7 +2737,6 @@ static void __exit nbd_cleanup(void)
destroy_workqueue(nbd_del_wq);
idr_destroy(&nbd_index_idr);
- genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
}