summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoe.h1
-rw-r--r--drivers/block/aoe/aoecmd.c10
-rw-r--r--drivers/block/aoe/aoedev.c15
-rw-r--r--drivers/block/brd.c6
-rw-r--r--drivers/block/drbd/drbd_main.c3
-rw-r--r--drivers/block/drbd/drbd_req.c3
-rw-r--r--drivers/block/drbd/drbd_worker.c6
-rw-r--r--drivers/block/loop.c18
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c7
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/block/swim3.c8
-rw-r--r--drivers/block/ublk_drv.c166
12 files changed, 150 insertions, 99 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 749ae1246f4c..d35caa3c69e1 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -80,6 +80,7 @@ enum {
DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */
DEVFL_FREED = (1<<8), /* device has been cleaned up */
+ DEVFL_DEAD = (1<<9), /* device has timed out of aoe_deadsecs */
};
enum {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 92b06d1de4cc..6298f8e271e3 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -745,7 +745,7 @@ rexmit_timer(struct timer_list *timer)
int utgts; /* number of aoetgt descriptors (not slots) */
int since;
- d = from_timer(d, timer, timer);
+ d = timer_container_of(d, timer, timer);
spin_lock_irqsave(&d->lock, flags);
@@ -754,7 +754,7 @@ rexmit_timer(struct timer_list *timer)
utgts = count_targets(d, NULL);
- if (d->flags & DEVFL_TKILL) {
+ if (d->flags & (DEVFL_TKILL | DEVFL_DEAD)) {
spin_unlock_irqrestore(&d->lock, flags);
return;
}
@@ -786,7 +786,8 @@ rexmit_timer(struct timer_list *timer)
* to clean up.
*/
list_splice(&flist, &d->factive[0]);
- aoedev_downdev(d);
+ d->flags |= DEVFL_DEAD;
+ queue_work(aoe_wq, &d->work);
goto out;
}
@@ -898,6 +899,9 @@ aoecmd_sleepwork(struct work_struct *work)
{
struct aoedev *d = container_of(work, struct aoedev, work);
+ if (d->flags & DEVFL_DEAD)
+ aoedev_downdev(d);
+
if (d->flags & DEVFL_GDALLOC)
aoeblk_gdalloc(d);
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 141b2a0e03f2..3a240755045b 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -149,7 +149,7 @@ dummy_timer(struct timer_list *t)
{
struct aoedev *d;
- d = from_timer(d, t, timer);
+ d = timer_container_of(d, t, timer);
if (d->flags & DEVFL_TKILL)
return;
d->timer.expires = jiffies + HZ;
@@ -198,9 +198,13 @@ aoedev_downdev(struct aoedev *d)
{
struct aoetgt *t, **tt, **te;
struct list_head *head, *pos, *nx;
+ struct request *rq, *rqnext;
int i;
+ unsigned long flags;
- d->flags &= ~DEVFL_UP;
+ spin_lock_irqsave(&d->lock, flags);
+ d->flags &= ~(DEVFL_UP | DEVFL_DEAD);
+ spin_unlock_irqrestore(&d->lock, flags);
/* clean out active and to-be-retransmitted buffers */
for (i = 0; i < NFACTIVE; i++) {
@@ -223,6 +227,13 @@ aoedev_downdev(struct aoedev *d)
/* clean out the in-process request (if any) */
aoe_failip(d);
+ /* clean out any queued block requests */
+ list_for_each_entry_safe(rq, rqnext, &d->rq_list, queuelist) {
+ list_del_init(&rq->queuelist);
+ blk_mq_start_request(rq);
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ }
+
/* fast fail all pending I/O */
if (d->blkq) {
/* UP is cleared, freeze+quiesce to insure all are errored */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b1be6c510372..0c2eabe14af3 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -64,13 +64,15 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
- rcu_read_lock();
- if (!page)
+ if (!page) {
+ rcu_read_lock();
return ERR_PTR(-ENOMEM);
+ }
xa_lock(&brd->brd_pages);
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
page, gfp);
+ rcu_read_lock();
if (ret) {
xa_unlock(&brd->brd_pages);
__free_page(page);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index ced2cc5f46f2..52724b79be30 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3591,7 +3591,8 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
static void md_sync_timer_fn(struct timer_list *t)
{
- struct drbd_device *device = from_timer(device, t, md_sync_timer);
+ struct drbd_device *device = timer_container_of(device, t,
+ md_sync_timer);
drbd_device_post_work(device, MD_SYNC);
}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 380e6584a4ee..d15826f6ee81 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1699,7 +1699,8 @@ static bool net_timeout_reached(struct drbd_request *net_req,
void request_timer_fn(struct timer_list *t)
{
- struct drbd_device *device = from_timer(device, t, request_timer);
+ struct drbd_device *device = timer_container_of(device, t,
+ request_timer);
struct drbd_connection *connection = first_peer_device(device)->connection;
struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
struct net_conf *nc;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 4352a50fbb3f..a6ea737b3b71 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -442,7 +442,8 @@ int w_resync_timer(struct drbd_work *w, int cancel)
void resync_timer_fn(struct timer_list *t)
{
- struct drbd_device *device = from_timer(device, t, resync_timer);
+ struct drbd_device *device = timer_container_of(device, t,
+ resync_timer);
drbd_queue_work_if_unqueued(
&first_peer_device(device)->connection->sender_work,
@@ -1698,7 +1699,8 @@ void drbd_rs_controller_reset(struct drbd_peer_device *peer_device)
void start_resync_timer_fn(struct timer_list *t)
{
- struct drbd_device *device = from_timer(device, t, start_resync_timer);
+ struct drbd_device *device = timer_container_of(device, t,
+ start_resync_timer);
drbd_device_post_work(device, RS_START);
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e2b1f377f585..8d994cae3b83 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -313,6 +313,8 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
return;
kfree(cmd->bvec);
cmd->bvec = NULL;
+ if (req_op(rq) == REQ_OP_WRITE)
+ kiocb_end_write(&cmd->iocb);
if (likely(!blk_should_fake_timeout(rq->q)))
blk_mq_complete_request(rq);
}
@@ -387,9 +389,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_flags = 0;
}
- if (rw == ITER_SOURCE)
+ if (rw == ITER_SOURCE) {
+ kiocb_start_write(&cmd->iocb);
ret = file->f_op->write_iter(&cmd->iocb, &iter);
- else
+ } else
ret = file->f_op->read_iter(&cmd->iocb, &iter);
lo_rw_aio_do_completion(cmd);
@@ -1244,12 +1247,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS);
- if (size_changed) {
- loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
- lo->lo_backing_file);
- loop_set_size(lo, new_size);
- }
-
/* update the direct I/O flag if lo_offset changed */
loop_update_dio(lo);
@@ -1257,6 +1254,11 @@ out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue, memflags);
if (partscan)
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
+ if (!err && size_changed) {
+ loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_backing_file);
+ loop_set_size(lo, new_size);
+ }
out_unlock:
mutex_unlock(&lo->lo_mutex);
if (partscan)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0d619df03fa9..66ce6b81c7d9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3717,7 +3717,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (rv) {
dev_warn(&pdev->dev, "64-bit DMA enable failed\n");
- goto setmask_err;
+ goto iomap_err;
}
/* Copy the info we may need later into the private data structure. */
@@ -3733,7 +3733,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
if (!dd->isr_workq) {
dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
rv = -ENOMEM;
- goto setmask_err;
+ goto iomap_err;
}
memset(cpu_list, 0, sizeof(cpu_list));
@@ -3830,8 +3830,6 @@ msi_initialize_err:
drop_cpu(dd->work[1].cpu_binding);
drop_cpu(dd->work[2].cpu_binding);
}
-setmask_err:
- pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
iomap_err:
kfree(dd);
@@ -3907,7 +3905,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
pci_disable_msi(pdev);
- pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
pci_set_drvdata(pdev, NULL);
put_disk(dd->disk);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7bdc7eb808ea..2592bd19ebc1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -2198,9 +2198,7 @@ again:
goto out;
}
}
- ret = nbd_start_device(nbd);
- if (ret)
- goto out;
+
if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
GFP_KERNEL);
@@ -2216,6 +2214,8 @@ again:
goto out;
}
set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
+
+ ret = nbd_start_device(nbd);
out:
mutex_unlock(&nbd->config_lock);
if (!ret) {
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ee6cade70222..01f7aef3fcfb 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -555,7 +555,7 @@ static void act(struct floppy_state *fs)
static void scan_timeout(struct timer_list *t)
{
- struct floppy_state *fs = from_timer(fs, t, timeout);
+ struct floppy_state *fs = timer_container_of(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
unsigned long flags;
@@ -579,7 +579,7 @@ static void scan_timeout(struct timer_list *t)
static void seek_timeout(struct timer_list *t)
{
- struct floppy_state *fs = from_timer(fs, t, timeout);
+ struct floppy_state *fs = timer_container_of(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
unsigned long flags;
@@ -598,7 +598,7 @@ static void seek_timeout(struct timer_list *t)
static void settle_timeout(struct timer_list *t)
{
- struct floppy_state *fs = from_timer(fs, t, timeout);
+ struct floppy_state *fs = timer_container_of(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
unsigned long flags;
@@ -627,7 +627,7 @@ static void settle_timeout(struct timer_list *t)
static void xfer_timeout(struct timer_list *t)
{
- struct floppy_state *fs = from_timer(fs, t, timeout);
+ struct floppy_state *fs = timer_container_of(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
struct dbdma_regs __iomem *dr = fs->dma;
unsigned long flags;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 6f51072776f1..9fd284fa76dc 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -69,7 +69,8 @@
| UBLK_F_USER_RECOVERY_FAIL_IO \
| UBLK_F_UPDATE_SIZE \
| UBLK_F_AUTO_BUF_REG \
- | UBLK_F_QUIESCE)
+ | UBLK_F_QUIESCE \
+ | UBLK_F_PER_IO_DAEMON)
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
@@ -166,6 +167,8 @@ struct ublk_io {
/* valid if UBLK_IO_FLAG_OWNED_BY_SRV is set */
struct request *req;
};
+
+ struct task_struct *task;
};
struct ublk_queue {
@@ -173,11 +176,9 @@ struct ublk_queue {
int q_depth;
unsigned long flags;
- struct task_struct *ubq_daemon;
struct ublksrv_io_desc *io_cmd_buf;
bool force_abort;
- bool timeout;
bool canceling;
bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
unsigned short nr_io_ready; /* how many ios setup */
@@ -1099,11 +1100,6 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu);
}
-static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
-{
- return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING;
-}
-
/* todo: handle partial completion */
static inline void __ublk_complete_rq(struct request *req)
{
@@ -1152,8 +1148,8 @@ exit:
blk_mq_end_request(req, res);
}
-static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
- int res, unsigned issue_flags)
+static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io,
+ struct request *req)
{
/* read cmd first because req will overwrite it */
struct io_uring_cmd *cmd = io->cmd;
@@ -1168,6 +1164,13 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
io->req = req;
+ return cmd;
+}
+
+static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
+ int res, unsigned issue_flags)
+{
+ struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req);
/* tell ublksrv one io request is coming */
io_uring_cmd_done(cmd, res, 0, issue_flags);
@@ -1275,13 +1278,13 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
/*
* Task is exiting if either:
*
- * (1) current != ubq_daemon.
+ * (1) current != io->task.
* io_uring_cmd_complete_in_task() tries to run task_work
- * in a workqueue if ubq_daemon(cmd's task) is PF_EXITING.
+ * in a workqueue if cmd's task is PF_EXITING.
*
* (2) current->flags & PF_EXITING.
*/
- if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) {
+ if (unlikely(current != io->task || current->flags & PF_EXITING)) {
__ublk_abort_rq(ubq, req);
return;
}
@@ -1330,24 +1333,22 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
{
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct request *rq = pdu->req_list;
- struct ublk_queue *ubq = pdu->ubq;
struct request *next;
do {
next = rq->rq_next;
rq->rq_next = NULL;
- ublk_dispatch_req(ubq, rq, issue_flags);
+ ublk_dispatch_req(rq->mq_hctx->driver_data, rq, issue_flags);
rq = next;
} while (rq);
}
-static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l)
+static void ublk_queue_cmd_list(struct ublk_io *io, struct rq_list *l)
{
- struct request *rq = rq_list_peek(l);
- struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd;
+ struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
- pdu->req_list = rq;
+ pdu->req_list = rq_list_peek(l);
rq_list_init(l);
io_uring_cmd_complete_in_task(cmd, ublk_cmd_list_tw_cb);
}
@@ -1355,13 +1356,10 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l)
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
{
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
+ struct ublk_io *io = &ubq->ios[rq->tag];
if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
- if (!ubq->timeout) {
- send_sig(SIGKILL, ubq->ubq_daemon, 0);
- ubq->timeout = true;
- }
-
+ send_sig(SIGKILL, io->task, 0);
return BLK_EH_DONE;
}
@@ -1425,28 +1423,39 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
+static inline bool ublk_belong_to_same_batch(const struct ublk_io *io,
+ const struct ublk_io *io2)
+{
+ return (io_uring_cmd_ctx_handle(io->cmd) ==
+ io_uring_cmd_ctx_handle(io2->cmd)) &&
+ (io->task == io2->task);
+}
+
static void ublk_queue_rqs(struct rq_list *rqlist)
{
struct rq_list requeue_list = { };
struct rq_list submit_list = { };
- struct ublk_queue *ubq = NULL;
+ struct ublk_io *io = NULL;
struct request *req;
while ((req = rq_list_pop(rqlist))) {
struct ublk_queue *this_q = req->mq_hctx->driver_data;
+ struct ublk_io *this_io = &this_q->ios[req->tag];
- if (ubq && ubq != this_q && !rq_list_empty(&submit_list))
- ublk_queue_cmd_list(ubq, &submit_list);
- ubq = this_q;
-
- if (ublk_prep_req(ubq, req, true) == BLK_STS_OK)
- rq_list_add_tail(&submit_list, req);
- else
+ if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) {
rq_list_add_tail(&requeue_list, req);
+ continue;
+ }
+
+ if (io && !ublk_belong_to_same_batch(io, this_io) &&
+ !rq_list_empty(&submit_list))
+ ublk_queue_cmd_list(io, &submit_list);
+ io = this_io;
+ rq_list_add_tail(&submit_list, req);
}
- if (ubq && !rq_list_empty(&submit_list))
- ublk_queue_cmd_list(ubq, &submit_list);
+ if (!rq_list_empty(&submit_list))
+ ublk_queue_cmd_list(io, &submit_list);
*rqlist = requeue_list;
}
@@ -1474,17 +1483,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
/* All old ioucmds have to be completed */
ubq->nr_io_ready = 0;
- /*
- * old daemon is PF_EXITING, put it now
- *
- * It could be NULL in case of closing one quisced device.
- */
- if (ubq->ubq_daemon)
- put_task_struct(ubq->ubq_daemon);
- /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
- ubq->ubq_daemon = NULL;
- ubq->timeout = false;
-
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -1495,6 +1493,17 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
io->flags &= UBLK_IO_FLAG_CANCELED;
io->cmd = NULL;
io->addr = 0;
+
+ /*
+ * old task is PF_EXITING, put it now
+ *
+ * It could be NULL in case of closing one quiesced
+ * device.
+ */
+ if (io->task) {
+ put_task_struct(io->task);
+ io->task = NULL;
+ }
}
}
@@ -1516,7 +1525,7 @@ static void ublk_reset_ch_dev(struct ublk_device *ub)
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_queue_reinit(ub, ublk_get_queue(ub, i));
- /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
+ /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */
ub->mm = NULL;
ub->nr_queues_ready = 0;
ub->nr_privileged_daemon = 0;
@@ -1783,6 +1792,7 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
struct task_struct *task;
+ struct ublk_io *io;
if (WARN_ON_ONCE(!ubq))
return;
@@ -1791,13 +1801,14 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
return;
task = io_uring_cmd_get_task(cmd);
- if (WARN_ON_ONCE(task && task != ubq->ubq_daemon))
+ io = &ubq->ios[pdu->tag];
+ if (WARN_ON_ONCE(task && task != io->task))
return;
if (!ubq->canceling)
ublk_start_cancel(ubq);
- WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd);
+ WARN_ON_ONCE(io->cmd != cmd);
ublk_cancel_cmd(ubq, pdu->tag, issue_flags);
}
@@ -1930,8 +1941,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
{
ubq->nr_io_ready++;
if (ublk_queue_ready(ubq)) {
- ubq->ubq_daemon = current;
- get_task_struct(ubq->ubq_daemon);
ub->nr_queues_ready++;
if (capable(CAP_SYS_ADMIN))
@@ -2084,6 +2093,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
}
ublk_fill_io_cmd(io, cmd, buf_addr);
+ WRITE_ONCE(io->task, get_task_struct(current));
ublk_mark_io_ready(ub, ubq);
out:
mutex_unlock(&ub->mutex);
@@ -2155,10 +2165,9 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
return 0;
}
-static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io)
+static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io,
+ struct request *req)
{
- struct request *req = io->req;
-
/*
* We have handled UBLK_IO_NEED_GET_DATA command,
* so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
@@ -2179,11 +2188,13 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
const struct ublksrv_io_cmd *ub_cmd)
{
struct ublk_device *ub = cmd->file->private_data;
+ struct task_struct *task;
struct ublk_queue *ubq;
struct ublk_io *io;
u32 cmd_op = cmd->cmd_op;
unsigned tag = ub_cmd->tag;
int ret = -EINVAL;
+ struct request *req;
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
@@ -2193,13 +2204,14 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
ubq = ublk_get_queue(ub, ub_cmd->q_id);
- if (ubq->ubq_daemon && ubq->ubq_daemon != current)
- goto out;
if (tag >= ubq->q_depth)
goto out;
io = &ubq->ios[tag];
+ task = READ_ONCE(io->task);
+ if (task && task != current)
+ goto out;
/* there is pending io cmd, something must be wrong */
if (io->flags & UBLK_IO_FLAG_ACTIVE) {
@@ -2241,11 +2253,19 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
break;
case UBLK_IO_NEED_GET_DATA:
- io->addr = ub_cmd->addr;
- if (!ublk_get_data(ubq, io))
- return -EIOCBQUEUED;
-
- return UBLK_IO_RES_OK;
+ /*
+ * ublk_get_data() may fail and fallback to requeue, so keep
+ * uring_cmd active first and prepare for handling new requeued
+ * request
+ */
+ req = io->req;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
+ io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
+ if (likely(ublk_get_data(ubq, io, req))) {
+ __ublk_prep_compl_io_cmd(io, req);
+ return UBLK_IO_RES_OK;
+ }
+ break;
default:
goto out;
}
@@ -2449,9 +2469,14 @@ static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
{
int size = ublk_queue_cmd_buf_size(ub, q_id);
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ int i;
+
+ for (i = 0; i < ubq->q_depth; i++) {
+ struct ublk_io *io = &ubq->ios[i];
+ if (io->task)
+ put_task_struct(io->task);
+ }
- if (ubq->ubq_daemon)
- put_task_struct(ubq->ubq_daemon);
if (ubq->io_cmd_buf)
free_pages((unsigned long)ubq->io_cmd_buf, get_order(size));
}
@@ -2825,6 +2850,10 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
if (copy_from_user(&info, argp, sizeof(info)))
return -EFAULT;
+ if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth ||
+ info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues)
+ return -EINVAL;
+
if (capable(CAP_SYS_ADMIN))
info.flags &= ~UBLK_F_UNPRIVILEGED_DEV;
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
@@ -2923,7 +2952,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
ub->dev_info.flags &= UBLK_F_ALL;
ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
- UBLK_F_URING_CMD_COMP_IN_TASK;
+ UBLK_F_URING_CMD_COMP_IN_TASK |
+ UBLK_F_PER_IO_DAEMON;
/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
@@ -3188,14 +3218,14 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL;
- pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
- __func__, ub->dev_info.nr_hw_queues, header->dev_id);
- /* wait until new ubq_daemon sending all FETCH_REQ */
+ pr_devel("%s: Waiting for all FETCH_REQs, dev id %d...\n", __func__,
+ header->dev_id);
+
if (wait_for_completion_interruptible(&ub->completion))
return -EINTR;
- pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n",
- __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+ pr_devel("%s: All FETCH_REQs received, dev id %d\n", __func__,
+ header->dev_id);
mutex_lock(&ub->mutex);
if (ublk_nosrv_should_stop_dev(ub))