summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig6
-rw-r--r--drivers/block/drbd/drbd_main.c6
-rw-r--r--drivers/block/drbd/drbd_receiver.c6
-rw-r--r--drivers/block/loop.c12
-rw-r--r--drivers/block/nbd.c10
-rw-r--r--drivers/block/rbd.c4
-rw-r--r--drivers/block/ublk_drv.c127
7 files changed, 97 insertions, 74 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 91de9fc29bbe..a2184b428493 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -365,6 +365,12 @@ config BLK_DEV_UBLK
definition isn't finalized yet, and might change according to future
requirement, so mark is as experimental now.
+ Say Y if you want to get better performance because task_work_add()
+ can be used in IO path for replacing io_uring cmd, which will become
+ shared between IO tasks and ubq daemon, meantime task_work_add() can
+ can handle batch more effectively, but task_work_add() isn't exported
+ for module, so ublk has to be built to kernel.
+
source "drivers/block/rnbd/Kconfig"
endif # BLK_DEV
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7213ffd69a16..2f16e1bfb6e7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1816,7 +1816,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
/* THINK if (signal_pending) return ... ? */
- iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, size);
if (sock == connection->data.socket) {
rcu_read_lock();
@@ -2671,7 +2671,7 @@ static int init_submitter(struct drbd_device *device)
enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
{
struct drbd_resource *resource = adm_ctx->resource;
- struct drbd_connection *connection;
+ struct drbd_connection *connection, *n;
struct drbd_device *device;
struct drbd_peer_device *peer_device, *tmp_peer_device;
struct gendisk *disk;
@@ -2790,7 +2790,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
out_destroy_workqueue:
destroy_workqueue(device->submit.wq);
out_idr_remove_from_resource:
- for_each_connection(connection, resource) {
+ for_each_connection_safe(connection, n, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
if (peer_device)
kref_put(&connection->kref, drbd_destroy_connection);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 24d0a2262982..0e58a3187345 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -507,7 +507,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
struct msghdr msg = {
.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
};
- iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size);
return sock_recvmsg(sock, &msg, msg.msg_flags);
}
@@ -781,7 +781,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_connection *connection,
timeo = connect_int * HZ;
/* 28.5% random jitter */
- timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7;
+ timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
if (err <= 0)
@@ -1004,7 +1004,7 @@ retry:
drbd_warn(connection, "Error receiving initial packet\n");
sock_release(s);
randomize:
- if (prandom_u32_max(2))
+ if (get_random_u32_below(2))
goto retry;
}
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ad92192c7d61..1f8f3b87bdfa 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -243,7 +243,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
struct iov_iter i;
ssize_t bw;
- iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
+ iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
file_start_write(file);
bw = vfs_iter_write(file, &i, ppos, 0);
@@ -286,7 +286,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
ssize_t len;
rq_for_each_segment(bvec, rq, iter) {
- iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len);
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0)
return len;
@@ -392,7 +392,7 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret)
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
- loff_t pos, bool rw)
+ loff_t pos, int rw)
{
struct iov_iter iter;
struct req_iterator rq_iter;
@@ -448,7 +448,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
- if (rw == WRITE)
+ if (rw == ITER_SOURCE)
ret = call_write_iter(file, &cmd->iocb, &iter);
else
ret = call_read_iter(file, &cmd->iocb, &iter);
@@ -490,12 +490,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
case REQ_OP_WRITE:
if (cmd->use_aio)
- return lo_rw_aio(lo, cmd, pos, WRITE);
+ return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
else
return lo_write_simple(lo, rq, pos);
case REQ_OP_READ:
if (cmd->use_aio)
- return lo_rw_aio(lo, cmd, pos, READ);
+ return lo_rw_aio(lo, cmd, pos, ITER_DEST);
else
return lo_read_simple(lo, rq, pos);
default:
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5cffd96ef2d7..e379ccc63c52 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -563,7 +563,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
u32 nbd_cmd_flags = 0;
int sent = nsock->sent, skip = 0;
- iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request));
type = req_to_nbd_cmd_type(req);
if (type == U32_MAX)
@@ -649,7 +649,7 @@ send_pages:
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
req, bvec.bv_len);
- iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&from, ITER_SOURCE, &bvec, 1, bvec.bv_len);
if (skip) {
if (skip >= iov_iter_count(&from)) {
skip -= iov_iter_count(&from);
@@ -701,7 +701,7 @@ static int nbd_read_reply(struct nbd_device *nbd, int index,
int result;
reply->magic = 0;
- iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply));
+ iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply));
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result < 0) {
if (!nbd_disconnected(nbd->config))
@@ -790,7 +790,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
struct iov_iter to;
rq_for_each_segment(bvec, req, iter) {
- iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&to, ITER_DEST, &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result < 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -1267,7 +1267,7 @@ static void send_disconnects(struct nbd_device *nbd)
for (i = 0; i < config->num_connections; i++) {
struct nbd_sock *nsock = config->socks[i];
- iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request));
mutex_lock(&nsock->tx_lock);
ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
if (ret < 0)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index f9e39301c4af..04453f4a319c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7222,8 +7222,10 @@ static int __init rbd_sysfs_init(void)
int ret;
ret = device_register(&rbd_root_dev);
- if (ret < 0)
+ if (ret < 0) {
+ put_device(&rbd_root_dev);
return ret;
+ }
ret = bus_register(&rbd_bus_type);
if (ret < 0)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 5afce6ffaadf..e9de9d846b73 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -57,11 +57,12 @@
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
struct ublk_rq_data {
+ struct llist_node node;
struct callback_head work;
};
struct ublk_uring_cmd_pdu {
- struct request *req;
+ struct ublk_queue *ubq;
};
/*
@@ -119,6 +120,8 @@ struct ublk_queue {
struct task_struct *ubq_daemon;
char *io_cmd_buf;
+ struct llist_head io_cmds;
+
unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz;
bool force_abort;
@@ -761,11 +764,31 @@ static inline void __ublk_rq_task_work(struct request *req)
ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
}
+static inline void ublk_forward_io_cmds(struct ublk_queue *ubq)
+{
+ struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+ struct ublk_rq_data *data, *tmp;
+
+ io_cmds = llist_reverse_order(io_cmds);
+ llist_for_each_entry_safe(data, tmp, io_cmds, node)
+ __ublk_rq_task_work(blk_mq_rq_from_pdu(data));
+}
+
+static inline void ublk_abort_io_cmds(struct ublk_queue *ubq)
+{
+ struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+ struct ublk_rq_data *data, *tmp;
+
+ llist_for_each_entry_safe(data, tmp, io_cmds, node)
+ __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
+}
+
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
{
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+ struct ublk_queue *ubq = pdu->ubq;
- __ublk_rq_task_work(pdu->req);
+ ublk_forward_io_cmds(ubq);
}
static void ublk_rq_task_work_fn(struct callback_head *work)
@@ -773,8 +796,45 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
struct ublk_rq_data *data = container_of(work,
struct ublk_rq_data, work);
struct request *req = blk_mq_rq_from_pdu(data);
+ struct ublk_queue *ubq = req->mq_hctx->driver_data;
+
+ ublk_forward_io_cmds(ubq);
+}
- __ublk_rq_task_work(req);
+static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
+{
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
+ struct ublk_io *io;
+
+ if (!llist_add(&data->node, &ubq->io_cmds))
+ return;
+
+ io = &ubq->ios[rq->tag];
+ /*
+ * If the check pass, we know that this is a re-issued request aborted
+ * previously in monitor_work because the ubq_daemon(cmd's task) is
+ * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
+ * because this ioucmd's io_uring context may be freed now if no inflight
+ * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
+ *
+ * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
+ * the tag). Then the request is re-started(allocating the tag) and we are here.
+ * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
+ * guarantees that here is a re-issued request aborted previously.
+ */
+ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
+ ublk_abort_io_cmds(ubq);
+ } else if (ublk_can_use_task_work(ubq)) {
+ if (task_work_add(ubq->ubq_daemon, &data->work,
+ TWA_SIGNAL_NO_IPI))
+ ublk_abort_io_cmds(ubq);
+ } else {
+ struct io_uring_cmd *cmd = io->cmd;
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->ubq = ubq;
+ io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+ }
}
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -788,6 +848,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
res = ublk_setup_iod(ubq, rq);
if (unlikely(res != BLK_STS_OK))
return BLK_STS_IOERR;
+
/* With recovery feature enabled, force_abort is set in
* ublk_stop_dev() before calling del_gendisk(). We have to
* abort all requeued and new rqs here to let del_gendisk()
@@ -803,53 +864,15 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
if (unlikely(ubq_daemon_is_dying(ubq))) {
- fail:
__ublk_abort_rq(ubq, rq);
return BLK_STS_OK;
}
- if (ublk_can_use_task_work(ubq)) {
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
- enum task_work_notify_mode notify_mode = bd->last ?
- TWA_SIGNAL_NO_IPI : TWA_NONE;
-
- if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
- goto fail;
- } else {
- struct ublk_io *io = &ubq->ios[rq->tag];
- struct io_uring_cmd *cmd = io->cmd;
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
-
- /*
- * If the check pass, we know that this is a re-issued request aborted
- * previously in monitor_work because the ubq_daemon(cmd's task) is
- * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
- * because this ioucmd's io_uring context may be freed now if no inflight
- * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
- *
- * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
- * the tag). Then the request is re-started(allocating the tag) and we are here.
- * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
- * guarantees that here is a re-issued request aborted previously.
- */
- if ((io->flags & UBLK_IO_FLAG_ABORTED))
- goto fail;
-
- pdu->req = rq;
- io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
- }
+ ublk_queue_cmd(ubq, rq);
return BLK_STS_OK;
}
-static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx)
-{
- struct ublk_queue *ubq = hctx->driver_data;
-
- if (ublk_can_use_task_work(ubq))
- __set_notify_signal(ubq->ubq_daemon);
-}
-
static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
unsigned int hctx_idx)
{
@@ -871,7 +894,6 @@ static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req,
static const struct blk_mq_ops ublk_mq_ops = {
.queue_rq = ublk_queue_rq,
- .commit_rqs = ublk_commit_rqs,
.init_hctx = ublk_init_hctx,
.init_request = ublk_init_rq,
};
@@ -1164,22 +1186,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
}
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
- int tag, struct io_uring_cmd *cmd)
+ int tag)
{
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
- if (ublk_can_use_task_work(ubq)) {
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
-
- /* should not fail since we call it just in ubq->ubq_daemon */
- task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
- } else {
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
-
- pdu->req = req;
- io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
- }
+ ublk_queue_cmd(ubq, req);
}
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@@ -1267,7 +1279,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->addr = ub_cmd->addr;
io->cmd = cmd;
io->flags |= UBLK_IO_FLAG_ACTIVE;
- ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
+ ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break;
default:
goto out;
@@ -1658,6 +1670,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
*/
ub->dev_info.flags &= UBLK_F_ALL;
+ if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
+ ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;