summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 10:34:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 10:34:51 -0700
commitc6b1e36c8fa04a6680c44fe0321d0370400e90b6 (patch)
tree5110f0639bfa803baa8d213cb21efe37beeaf742 /drivers
parent81e3e044897b0875a52953b3fb6241a33428e4f9 (diff)
parenta84ebb837b419787c2ece74efa566c998929cead (diff)
Merge branch 'for-4.13/block' of git://git.kernel.dk/linux-block
Pull core block/IO updates from Jens Axboe: "This is the main pull request for the block layer for 4.13. Not a huge round in terms of features, but there's a lot of churn related to some core cleanups. Note this depends on the UUID tree pull request, that Christoph already sent out. This pull request contains: - A series from Christoph, unifying the error/stats codes in the block layer. We now use blk_status_t everywhere, instead of using different schemes for different places. - Also from Christoph, some cleanups around request allocation and IO scheduler interactions in blk-mq. - And yet another series from Christoph, cleaning up how we handle and do bounce buffering in the block layer. - A blk-mq debugfs series from Bart, further improving on the support we have for exporting internal information to aid debugging IO hangs or stalls. - Also from Bart, a series that cleans up the request initialization differences across types of devices. - A series from Goldwyn Rodrigues, allowing the block layer to return failure if we will block and the user asked for non-blocking. - Patch from Hannes for supporting setting loop devices block size to that of the underlying device. - Two series of patches from Javier, fixing various issues with lightnvm, particular around pblk. - A series from me, adding support for write hints. This comes with NVMe support as well, so applications can help guide data placement on flash to improve performance, latencies, and write amplification. - A series from Ming, improving and hardening blk-mq support for stopping/starting and quiescing hardware queues. - Two pull requests for NVMe updates. Nothing major on the feature side, but lots of cleanups and bug fixes. From the usual crew. - A series from Neil Brown, greatly improving the bio rescue set support. Most notably, this kills the bio rescue work queues, if we don't really need them. - Lots of other little bug fixes that are all over the place" * 'for-4.13/block' of git://git.kernel.dk/linux-block: (217 commits) lightnvm: pblk: set line bitmap check under debug lightnvm: pblk: verify that cache read is still valid lightnvm: pblk: add initialization check lightnvm: pblk: remove target using async. I/Os lightnvm: pblk: use vmalloc for GC data buffer lightnvm: pblk: use right metadata buffer for recovery lightnvm: pblk: schedule if data is not ready lightnvm: pblk: remove unused return variable lightnvm: pblk: fix double-free on pblk init lightnvm: pblk: fix bad le64 assignations nvme: Makefile: remove dead build rule blk-mq: map all HWQ also in hyperthreaded system nvmet-rdma: register ib_client to not deadlock in device removal nvme_fc: fix error recovery on link down. nvmet_fc: fix crashes on bad opcodes nvme_fc: Fix crash when nvme controller connection fails. nvme_fc: replace ioabort msleep loop with completion nvme_fc: fix double calls to nvme_cleanup_cmd() nvme-fabrics: verify that a controller returns the correct NQN nvme: simplify nvme_dev_attrs_are_visible ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/DAC960.c2
-rw-r--r--drivers/block/amiflop.c10
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--drivers/block/aoe/aoedev.c2
-rw-r--r--drivers/block/ataflop.c16
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c6
-rw-r--r--drivers/block/drbd/drbd_int.h5
-rw-r--r--drivers/block/drbd/drbd_main.c14
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c6
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c16
-rw-r--r--drivers/block/floppy.c9
-rw-r--r--drivers/block/loop.c64
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c54
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h2
-rw-r--r--drivers/block/nbd.c44
-rw-r--r--drivers/block/null_blk.c125
-rw-r--r--drivers/block/paride/pcd.c9
-rw-r--r--drivers/block/paride/pd.c3
-rw-r--r--drivers/block/paride/pf.c19
-rw-r--r--drivers/block/pktcdvd.c40
-rw-r--r--drivers/block/ps3disk.c11
-rw-r--r--drivers/block/ps3vram.c16
-rw-r--r--drivers/block/rbd.c28
-rw-r--r--drivers/block/rsxx/dev.c17
-rw-r--r--drivers/block/rsxx/dma.c13
-rw-r--r--drivers/block/rsxx/rsxx_priv.h2
-rw-r--r--drivers/block/skd_main.c32
-rw-r--r--drivers/block/sunvdc.c4
-rw-r--r--drivers/block/swim.c8
-rw-r--r--drivers/block/swim3.c29
-rw-r--r--drivers/block/sx8.c20
-rw-r--r--drivers/block/umem.c4
-rw-r--r--drivers/block/virtio_blk.c23
-rw-r--r--drivers/block/xen-blkback/blkback.c19
-rw-r--r--drivers/block/xen-blkfront.c81
-rw-r--r--drivers/block/xsysace.c9
-rw-r--r--drivers/block/z2ram.c4
-rw-r--r--drivers/cdrom/cdrom.c7
-rw-r--r--drivers/cdrom/gdrom.c10
-rw-r--r--drivers/ide/ide-atapi.c12
-rw-r--r--drivers/ide/ide-cd.c11
-rw-r--r--drivers/ide/ide-cd_ioctl.c1
-rw-r--r--drivers/ide/ide-devsets.c1
-rw-r--r--drivers/ide/ide-disk.c1
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-eh.c16
-rw-r--r--drivers/ide/ide-floppy.c6
-rw-r--r--drivers/ide/ide-io.c10
-rw-r--r--drivers/ide/ide-ioctls.c2
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/ide/ide-pm.c8
-rw-r--r--drivers/ide/ide-probe.c7
-rw-r--r--drivers/ide/ide-tape.c3
-rw-r--r--drivers/ide/ide-taskfile.c7
-rw-r--r--drivers/ide/siimage.c6
-rw-r--r--drivers/lightnvm/core.c13
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c617
-rw-r--r--drivers/lightnvm/pblk-gc.c475
-rw-r--r--drivers/lightnvm/pblk-init.c389
-rw-r--r--drivers/lightnvm/pblk-map.c75
-rw-r--r--drivers/lightnvm/pblk-rb.c106
-rw-r--r--drivers/lightnvm/pblk-read.c93
-rw-r--r--drivers/lightnvm/pblk-recovery.c275
-rw-r--r--drivers/lightnvm/pblk-rl.c90
-rw-r--r--drivers/lightnvm/pblk-sysfs.c94
-rw-r--r--drivers/lightnvm/pblk-write.c353
-rw-r--r--drivers/lightnvm/pblk.h296
-rw-r--r--drivers/lightnvm/rrpc.c10
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/io.c6
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/movinggc.c10
-rw-r--r--drivers/md/bcache/request.c28
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c14
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.h2
-rw-r--r--drivers/md/dm-bufio.c28
-rw-r--r--drivers/md/dm-cache-target.c36
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-flakey.c13
-rw-r--r--drivers/md/dm-integrity.c30
-rw-r--r--drivers/md/dm-io.c13
-rw-r--r--drivers/md/dm-log-writes.c13
-rw-r--r--drivers/md/dm-mpath.c85
-rw-r--r--drivers/md/dm-raid1.c29
-rw-r--r--drivers/md/dm-rq.c30
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-snap.c15
-rw-r--r--drivers/md/dm-stripe.c17
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin.c67
-rw-r--r--drivers/md/dm-verity-target.c16
-rw-r--r--drivers/md/dm-zero.c4
-rw-r--r--drivers/md/dm.c88
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/multipath.c10
-rw-r--r--drivers/md/raid1.c38
-rw-r--r--drivers/md/raid10.c38
-rw-r--r--drivers/md/raid5-cache.c6
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/md/raid5.c24
-rw-r--r--drivers/memstick/core/ms_block.c7
-rw-r--r--drivers/memstick/core/mspro_block.c8
-rw-r--r--drivers/mmc/core/block.c37
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/mtd/mtd_blkdevs.c31
-rw-r--r--drivers/mtd/ubi/block.c8
-rw-r--r--drivers/nvdimm/blk.c5
-rw-r--r--drivers/nvdimm/btt.c5
-rw-r--r--drivers/nvdimm/pmem.c29
-rw-r--r--drivers/nvme/host/Kconfig12
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c525
-rw-r--r--drivers/nvme/host/fabrics.c69
-rw-r--r--drivers/nvme/host/fabrics.h4
-rw-r--r--drivers/nvme/host/fc.c137
-rw-r--r--drivers/nvme/host/lightnvm.c18
-rw-r--r--drivers/nvme/host/nvme.h42
-rw-r--r--drivers/nvme/host/pci.c647
-rw-r--r--drivers/nvme/host/rdma.c212
-rw-r--r--drivers/nvme/host/scsi.c2460
-rw-r--r--drivers/nvme/target/admin-cmd.c65
-rw-r--r--drivers/nvme/target/configfs.c68
-rw-r--r--drivers/nvme/target/core.c3
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fc.c10
-rw-r--r--drivers/nvme/target/fcloop.c2
-rw-r--r--drivers/nvme/target/io-cmd.c4
-rw-r--r--drivers/nvme/target/loop.c67
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/nvme/target/rdma.c102
-rw-r--r--drivers/s390/block/dasd.c36
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c8
-rw-r--r--drivers/s390/block/scm_blk.h4
-rw-r--r--drivers/s390/block/xpram.c2
-rw-r--r--drivers/s390/cio/eadm_sch.c6
-rw-r--r--drivers/s390/cio/scm.c2
-rw-r--r--drivers/sbus/char/jsflash.c5
-rw-r--r--drivers/scsi/osd/osd_initiator.c29
-rw-r--r--drivers/scsi/osst.c3
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_lib.c104
-rw-r--r--drivers/scsi/scsi_transport_sas.c10
-rw-r--r--drivers/scsi/sg.c8
-rw-r--r--drivers/scsi/st.c3
-rw-r--r--drivers/target/target_core_iblock.c12
-rw-r--r--drivers/target/target_core_pscsi.c6
161 files changed, 4187 insertions, 5197 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be77227..245a879b036e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
bool SuccessfulIO)
{
struct request *Request = Command->Request;
- int Error = SuccessfulIO ? 0 : -EIO;
+ blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f673adfe..49908c74bfcb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
struct amiga_floppy_struct *floppy;
char *data;
unsigned long flags;
- int err;
+ blk_status_t err;
next_req:
rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
next_segment:
/* Here someone could investigate to be more efficient */
- for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+ for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
#ifdef DEBUG
printk("fd: sector %ld + %d requested for %s\n",
blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
#endif
block = blk_rq_pos(rq) + cnt;
if ((int)block > floppy->blocks) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
@@ -1413,7 +1413,7 @@ next_segment:
#endif
if (get_track(drive, track) == -1) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
@@ -1424,7 +1424,7 @@ next_segment:
/* keep the drive spinning while writes are scheduled */
if (!fd_motor_on(drive)) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
/*
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876370bc..6797e6c23c8a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
d->aoemajor, d->aoeminor);
goto err_mempool;
}
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
spin_lock_irqsave(&d->lock, flags);
WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c09fd5a..dc43254e05a4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
d->ip.rq = NULL;
do {
bio = rq->bio;
- bok = !fastfail && !bio->bi_error;
- } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+ bok = !fastfail && !bio->bi_status;
+ } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
/* cf. http://lkml.org/lkml/2006/10/31/28 */
if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
ahout->cmdstat, ahin->cmdstat,
d->aoemajor, d->aoeminor);
noskb: if (buf)
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
goto out;
}
@@ -1144,7 +1144,7 @@ noskb: if (buf)
"aoe: runt data size in read from",
(long) d->aoemajor, d->aoeminor,
skb->len, n);
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
break;
}
if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb: if (buf)
"aoe: too-large data size in read from",
(long) d->aoemajor, d->aoeminor,
n, f->iter.bi_size);
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
break;
}
bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
if (buf == NULL)
return;
buf->iter.bi_size = 0;
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
if (buf->nframesout == 0)
aoe_end_buf(d, buf);
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947500c6..b28fefb90391 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
if (rq == NULL)
return;
while ((bio = d->ip.nxbio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
d->ip.nxbio = bio->bi_next;
n = (unsigned long) rq->special;
rq->special = (void *) --n;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd52cb5..92da886180aa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
static DEFINE_TIMER(fd_timer, check_change, 0, 0);
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
{
if (!__blk_end_request_cur(fd_request, err))
fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
fd_request->error_count++;
if (fd_request->error_count >= MAX_ERRORS) {
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
}
else if (fd_request->error_count == RECALIBRATE_ERRORS) {
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
}
else {
/* all sectors finished */
- fd_end_request_cur(0);
+ fd_end_request_cur(BLK_STS_OK);
redo_fd_request();
return;
}
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
}
else {
/* all sectors finished */
- fd_end_request_cur(0);
+ fd_end_request_cur(BLK_STS_OK);
redo_fd_request();
}
return;
@@ -1445,7 +1445,7 @@ repeat:
if (!UD.connected) {
/* drive not connected */
printk(KERN_ERR "Unknown Device: fd%d\n", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
@@ -1461,12 +1461,12 @@ repeat:
/* user supplied disk type */
if (--type >= NUM_DISK_MINORS) {
printk(KERN_WARNING "fd%d: invalid disk format", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
if (minor2disktype[type].drive_types > DriveType) {
printk(KERN_WARNING "fd%d: unsupported disk format", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
}
if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f2f66a..6112e99bedf7 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
blk_queue_make_request(brd->brd_queue, brd_make_request);
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
- blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd375503f7b0..02a611993bb4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
/* set the residual count for pc requests */
if (blk_rq_is_passthrough(rq))
scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
- blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+ blk_end_request_all(rq, scsi_req(rq)->result ?
+ BLK_STS_IOERR : BLK_STS_OK);
spin_lock_irqsave(&h->lock, flags);
cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
disk->queue->cmd_size = sizeof(struct scsi_request);
disk->queue->request_fn = do_cciss_request;
disk->queue->queue_lock = &h->lock;
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
if (blk_init_allocated_queue(disk->queue) < 0)
goto cleanup_queue;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa49c12..e02c45cd3c5a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
else
submit_bio(bio);
wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
- if (!bio->bi_error)
+ if (!bio->bi_status)
err = device->md_io.error;
out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a4107fbc..809fd245c3dc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
!bm_test_page_unchanged(b->bm_pages[idx]))
drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* ctx error will hold the completed-last non-zero error code,
* in case error codes differ. */
- ctx->error = bio->bi_error;
+ ctx->error = blk_status_to_errno(bio->bi_status);
bm_set_page_io_err(b->bm_pages[idx]);
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
if (__ratelimit(&drbd_ratelimit_state))
drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
- bio->bi_error, idx);
+ bio->bi_status, idx);
} else {
bm_clear_page_io_err(b->bm_pages[idx]);
dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..d17b6e6393c7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
__release(local);
if (!bio->bi_bdev) {
drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
- bio->bi_error = -ENODEV;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
}
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..5fb99e06ebe4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
mempool_t *drbd_ee_mempool;
mempool_t *drbd_md_io_page_pool;
struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
/* I do not use a standard mempool, because:
1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
+ if (drbd_io_bio_set)
+ bioset_free(drbd_io_bio_set);
if (drbd_md_io_bio_set)
bioset_free(drbd_md_io_bio_set);
if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
if (drbd_al_ext_cache)
kmem_cache_destroy(drbd_al_ext_cache);
+ drbd_io_bio_set = NULL;
drbd_md_io_bio_set = NULL;
drbd_md_io_page_pool = NULL;
drbd_ee_mempool = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
drbd_pp_pool = NULL;
drbd_md_io_page_pool = NULL;
drbd_md_io_bio_set = NULL;
+ drbd_io_bio_set = NULL;
/* caches */
drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
goto Enomem;
/* mempools */
- drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+ drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+ if (drbd_io_bio_set == NULL)
+ goto Enomem;
+
+ drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER);
if (drbd_md_io_bio_set == NULL)
goto Enomem;
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
q->queue_lock = &resource->req_lock;
device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 02255a0d68b9..ad0fcb43e45c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
static enum drbd_ret_code
check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
{
- static enum drbd_ret_code rv;
+ enum drbd_ret_code rv;
struct drbd_peer_device *peer_device;
int i;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be24f39..c7e95e6380fb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
struct drbd_device *device = octx->device;
struct issue_flush_context *ctx = octx->ctx;
- if (bio->bi_error) {
- ctx->error = bio->bi_error;
- drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+ if (bio->bi_status) {
+ ctx->error = blk_status_to_errno(bio->bi_status);
+ drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
}
kfree(octx);
bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 656624314f0d..f6e865b2d543 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m)
{
- m->bio->bi_error = m->error;
+ m->bio->bi_status = errno_to_blk_status(m->error);
bio_endio(m->bio);
dec_ap_bio(device);
}
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
GFP_NOIO, 0))
- req->private_bio->bi_error = -EIO;
+ req->private_bio->bi_status = BLK_STS_IOERR;
bio_endio(req->private_bio);
}
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
/* only pass the error to the upper layers.
* if user cannot handle io errors, that's not our business. */
drbd_err(device, "could not kmalloc() req\n");
- bio->bi_error = -ENOMEM;
+ bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return ERR_PTR(-ENOMEM);
}
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
struct drbd_device *device = (struct drbd_device *) q->queuedata;
unsigned long start_jif;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
start_jif = jiffies;
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f2da91..9e1866ab238f 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
struct bio *bio;
- bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+ bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
req->private_bio = bio;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e02d8d..1d8726a8df34 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
struct drbd_device *device;
device = bio->bi_private;
- device->md_io.error = bio->bi_error;
+ device->md_io.error = blk_status_to_errno(bio->bi_status);
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
bio_op(bio) == REQ_OP_DISCARD;
- if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+ if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? (is_discard ? "discard" : "write")
- : "read", bio->bi_error,
+ : "read", bio->bi_status,
(unsigned long long)peer_req->i.sector);
- if (bio->bi_error)
+ if (bio->bi_status)
set_bit(__EE_WAS_ERROR, &peer_req->flags);
bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
if (__ratelimit(&drbd_ratelimit_state))
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
- if (!bio->bi_error)
+ if (!bio->bi_status)
drbd_panic_after_delayed_completion_of_aborted_request(device);
}
/* to avoid recursion in __req_mod */
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
switch (bio_op(bio)) {
case REQ_OP_WRITE_ZEROES:
case REQ_OP_DISCARD:
- if (bio->bi_error == -EOPNOTSUPP)
+ if (bio->bi_status == BLK_STS_NOTSUPP)
what = DISCARD_COMPLETED_NOTSUPP;
else
what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
}
bio_put(req->private_bio);
- req->private_bio = ERR_PTR(bio->bi_error);
+ req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
/* not req_mod(), we need irqsave here! */
spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c7653178..ce823647a9c4 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
* =============================
*/
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
{
unsigned int nr_sectors = current_count_sectors;
unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
DRWE->last_error_generation = DRS->generation;
}
spin_lock_irqsave(q->queue_lock, flags);
- floppy_end_request(req, -EIO);
+ floppy_end_request(req, BLK_STS_IOERR);
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
int drive = cbdata->drive;
- if (bio->bi_error) {
+ if (bio->bi_status) {
pr_info("floppy: error %d while reading block 0\n",
- bio->bi_error);
+ bio->bi_status);
set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
}
complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
goto out_put_disk;
}
+ blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
blk_queue_max_hw_sectors(disks[drive]->queue, 64);
disks[drive]->major = FLOPPY_MAJOR;
disks[drive]->first_minor = TOMINOR(drive);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ebbd0c3fe0ed..0de11444e317 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+ loff_t logical_blocksize)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ lo->lo_logical_blocksize = logical_blocksize;
+ blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+ blk_queue_logical_block_size(lo->lo_queue,
+ lo->lo_logical_blocksize);
+ }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
zero_fill_bio(bio);
}
- blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+ blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -813,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
+ int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
@@ -832,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+ lo_bits = blksize_bits(lo->lo_logical_blocksize);
+
+ blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -843,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
kthread_stop(lo->worker_task);
}
+static int loop_kthread_worker_fn(void *worker_ptr)
+{
+ current->flags |= PF_LESS_THROTTLE;
+ return kthread_worker_fn(worker_ptr);
+}
+
static int loop_prepare_queue(struct loop_device *lo)
{
kthread_init_worker(&lo->worker);
- lo->worker_task = kthread_run(kthread_worker_fn,
+ lo->worker_task = kthread_run(loop_kthread_worker_fn,
&lo->worker, "loop%d", lo->lo_number);
if (IS_ERR(lo->worker_task))
return -ENOMEM;
@@ -921,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
+ lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -1086,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+ int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1118,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto exit;
+ if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
+ lo->lo_logical_blocksize = 512;
+ lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+ if (LO_INFO_BLOCKSIZE(info) != 512 &&
+ LO_INFO_BLOCKSIZE(info) != 1024 &&
+ LO_INFO_BLOCKSIZE(info) != 2048 &&
+ LO_INFO_BLOCKSIZE(info) != 4096)
+ return -EINVAL;
+ if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
+ return -EINVAL;
+ }
+
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit)
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+ lo->lo_sizelimit != info->lo_sizelimit ||
+ lo->lo_flags != lo_flags ||
+ ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+ lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+ LO_INFO_BLOCKSIZE(info))) {
err = -EFBIG;
goto exit;
}
+ }
loop_config_discard(lo);
@@ -1306,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
return err;
}
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
{
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_logical_blocksize);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1369,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_CAPACITY:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_capacity(lo, bdev);
+ err = loop_set_capacity(lo);
break;
case LOOP_SET_DIRECT_IO:
err = -EPERM;
@@ -1645,7 +1683,7 @@ int loop_unregister_transfer(int number)
EXPORT_SYMBOL(loop_register_transfer);
EXPORT_SYMBOL(loop_unregister_transfer);
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1654,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
if (lo->lo_state != Lo_bound)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
switch (req_op(cmd->rq)) {
case REQ_OP_FLUSH:
@@ -1669,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
kthread_queue_work(&lo->worker, &cmd->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f97ef8c..2c096b9a17b8 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,6 +49,7 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
+ unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4f5653..61b046f256ca 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
struct smart_attr *attrib);
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
- mtip_complete_command(cmd, -EIO);
+ mtip_complete_command(cmd, BLK_STS_IOERR);
return;
}
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
tag,
fail_reason != NULL ?
fail_reason : "unknown");
- mtip_complete_command(cmd, -ENODATA);
+ mtip_complete_command(cmd, BLK_STS_MEDIUM);
continue;
}
}
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
dev_warn(&port->dd->pdev->dev,
"retiring tag %d\n", tag);
- mtip_complete_command(cmd, -EIO);
+ mtip_complete_command(cmd, BLK_STS_IOERR);
}
}
print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
/* insert request and run queue */
blk_execute_rq(rq->q, NULL, rq, true);
- rv = int_cmd->status;
- if (rv < 0) {
- if (rv == -ERESTARTSYS) { /* interrupted */
- dev_err(&dd->pdev->dev,
- "Internal command [%02X] was interrupted after %u ms\n",
- fis->command,
- jiffies_to_msecs(jiffies - start));
- rv = -EINTR;
- goto exec_ic_exit;
- } else if (rv == 0) /* timeout */
- dev_err(&dd->pdev->dev,
- "Internal command did not complete [%02X] within timeout of %lu ms\n",
- fis->command, timeout);
- else
- dev_err(&dd->pdev->dev,
- "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
- fis->command, rv, timeout);
+ if (int_cmd->status) {
+ dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
+ fis->command, int_cmd->status);
+ rv = -EIO;
if (mtip_check_surprise_removal(dd->pdev) ||
test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
clear_bit(req->tag, dd->port->cmds_to_issue);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
mtip_softirq_done_fn(req);
}
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
int err;
err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
- blk_mq_end_request(rq, err);
+ blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
return 0;
}
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
return false;
}
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+ struct request *rq)
{
struct driver_data *dd = hctx->queue->queuedata;
struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
struct mtip_cmd_sg *command_sg;
if (mtip_commands_active(dd->port))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
/* Populate the SG list */
cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
mtip_issue_non_ncq_command(dd->port, rq->tag);
- return BLK_MQ_RQ_QUEUE_OK;
+ return 0;
}
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
return mtip_issue_reserved_cmd(hctx, rq);
if (unlikely(mtip_check_unal_depth(hctx, rq)))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
blk_mq_start_request(rq);
ret = mtip_submit_request(hctx, rq);
if (likely(!ret))
- return BLK_MQ_RQ_QUEUE_OK;
-
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_OK;
+ return BLK_STS_IOERR;
}
static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
if (reserved) {
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
- cmd->status = -ETIME;
+ cmd->status = BLK_STS_TIMEOUT;
return BLK_EH_HANDLED;
}
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
- cmd->status = -ENODEV;
+ cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(rq);
}
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e0bb78..e8286af50e16 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
int retries; /* The number of retries left for this command. */
int direction; /* Data transfer direction */
- int status;
+ blk_status_t status;
};
/* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f3f191ba8ca4..977ec960dd2f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
int index;
int cookie;
struct completion send_complete;
- int status;
+ blk_status_t status;
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_config *config;
if (!refcount_inc_not_zero(&nbd->config_refs)) {
- cmd->status = -EIO;
+ cmd->status = BLK_STS_TIMEOUT;
return BLK_EH_HANDLED;
}
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
"Connection timed out\n");
}
set_bit(NBD_TIMEDOUT, &config->runtime_flags);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
sock_shutdown(nbd);
nbd_config_put(nbd);
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u32 type;
+ u32 nbd_cmd_flags = 0;
u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0;
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
return -EIO;
}
+ if (req->cmd_flags & REQ_FUA)
+ nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+
/* We did a partial send previously, and we at least sent the whole
* request struct, so just go and send the rest of the pages in the
* request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
}
cmd->index = index;
cmd->cookie = nsock->cookie;
- request.type = htonl(type);
+ request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
nsock->pending = req;
nsock->sent = sent;
}
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ send_pages:
*/
nsock->pending = req;
nsock->sent = sent;
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
return cmd;
}
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
*/
if (nbd_disconnected(config) ||
config->num_connections <= 1) {
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
return cmd;
}
return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
if (!blk_mq_request_started(req))
return;
cmd = blk_mq_rq_to_pdu(req);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
}
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
nbd_config_put(nbd);
return -EINVAL;
}
- cmd->status = 0;
+ cmd->status = BLK_STS_OK;
again:
nsock = config->socks[index];
mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ out:
return ret;
}
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
* appropriate.
*/
ret = nbd_handle_cmd(cmd, hctx->queue_num);
- if (ret < 0)
- ret = BLK_MQ_RQ_QUEUE_ERROR;
- if (!ret)
- ret = BLK_MQ_RQ_QUEUE_OK;
complete(&cmd->send_complete);
- return ret;
+ return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
}
static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
continue;
}
sk_set_memalloc(sock->sk);
+ sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
atomic_inc(&config->recv_threads);
refcount_inc(&nbd->config_refs);
old = nsock->sock;
@@ -957,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
set_disk_ro(nbd->disk, false);
if (config->flags & NBD_FLAG_SEND_TRIM)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
- if (config->flags & NBD_FLAG_SEND_FLUSH)
- blk_queue_write_cache(nbd->disk->queue, true, false);
+ if (config->flags & NBD_FLAG_SEND_FLUSH) {
+ if (config->flags & NBD_FLAG_SEND_FUA)
+ blk_queue_write_cache(nbd->disk->queue, true, true);
+ else
+ blk_queue_write_cache(nbd->disk->queue, true, false);
+ }
else
blk_queue_write_cache(nbd->disk->queue, false, false);
}
@@ -1071,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
return -ENOMEM;
}
sk_set_memalloc(config->socks[i]->sock->sk);
+ config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
atomic_inc(&config->recv_threads);
refcount_inc(&nbd->config_refs);
INIT_WORK(&args->work, recv_work);
@@ -1305,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
seq_puts(s, "NBD_FLAG_READ_ONLY\n");
if (flags & NBD_FLAG_SEND_FLUSH)
seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+ if (flags & NBD_FLAG_SEND_FUA)
+ seq_puts(s, "NBD_FLAG_SEND_FUA\n");
if (flags & NBD_FLAG_SEND_TRIM)
seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1eeac8e..71f4422eba81 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,7 +35,8 @@ struct nullb {
struct request_queue *q;
struct gendisk *disk;
struct nvm_dev *ndev;
- struct blk_mq_tag_set tag_set;
+ struct blk_mq_tag_set *tag_set;
+ struct blk_mq_tag_set __tag_set;
struct hrtimer timer;
unsigned int queue_depth;
spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
static int null_major;
static int nullb_indexes;
static struct kmem_cache *ppa_cache;
+static struct blk_mq_tag_set tag_set;
enum {
NULL_IRQ_NONE = 0,
@@ -109,7 +111,7 @@ static int bs = 512;
module_param(bs, int, S_IRUGO);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
-static int nr_devices = 2;
+static int nr_devices = 1;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
@@ -121,6 +123,10 @@ static bool blocking;
module_param(blocking, bool, S_IRUGO);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+static bool shared_tags;
+module_param(shared_tags, bool, S_IRUGO);
+MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+
static int irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, 0);
+ blk_mq_end_request(cmd->rq, BLK_STS_OK);
return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist);
- blk_end_request_all(cmd->rq, 0);
+ blk_end_request_all(cmd->rq, BLK_STS_OK);
break;
case NULL_Q_BIO:
bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
}
}
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
null_handle_cmd(cmd);
- return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
- BUG_ON(!nullb);
- BUG_ON(!nq);
-
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
-}
-
-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
- unsigned int index)
-{
- struct nullb *nullb = data;
- struct nullb_queue *nq = &nullb->queues[index];
-
- hctx->driver_data = nq;
- null_init_queue(nullb, nq);
- nullb->nr_queues++;
-
- return 0;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
- .init_hctx = null_init_hctx,
.complete = null_softirq_done_fn,
};
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
#ifdef CONFIG_NVM
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
{
struct nvm_rq *rqd = rq->end_io_data;
- rqd->error = error;
+ /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+ rqd->error = status ? -EIO : 0;
nvm_end_io(rqd);
blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
else
del_gendisk(nullb->disk);
blk_cleanup_queue(nullb->q);
- if (queue_mode == NULL_Q_MQ)
- blk_mq_free_tag_set(&nullb->tag_set);
+ if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ blk_mq_free_tag_set(nullb->tag_set);
if (!use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
.release = null_release,
};
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+ BUG_ON(!nullb);
+ BUG_ON(!nq);
+
+ init_waitqueue_head(&nq->wait);
+ nq->queue_depth = nullb->queue_depth;
+}
+
+static void null_init_queues(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+ struct blk_mq_hw_ctx *hctx;
+ struct nullb_queue *nq;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (!hctx->nr_ctx || !hctx->tags)
+ continue;
+ nq = &nullb->queues[i];
+ hctx->driver_data = nq;
+ null_init_queue(nullb, nq);
+ nullb->nr_queues++;
+ }
+}
+
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
return 0;
}
+static int null_init_tag_set(struct blk_mq_tag_set *set)
+{
+ set->ops = &null_mq_ops;
+ set->nr_hw_queues = submit_queues;
+ set->queue_depth = hw_queue_depth;
+ set->numa_node = home_node;
+ set->cmd_size = sizeof(struct nullb_cmd);
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ set->driver_data = NULL;
+
+ if (blocking)
+ set->flags |= BLK_MQ_F_BLOCKING;
+
+ return blk_mq_alloc_tag_set(set);
+}
+
static int null_add_dev(void)
{
struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
goto out_free_nullb;
if (queue_mode == NULL_Q_MQ) {
- nullb->tag_set.ops = &null_mq_ops;
- nullb->tag_set.nr_hw_queues = submit_queues;
- nullb->tag_set.queue_depth = hw_queue_depth;
- nullb->tag_set.numa_node = home_node;
- nullb->tag_set.cmd_size = sizeof(struct nullb_cmd);
- nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
- nullb->tag_set.driver_data = nullb;
-
- if (blocking)
- nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
-
- rv = blk_mq_alloc_tag_set(&nullb->tag_set);
+ if (shared_tags) {
+ nullb->tag_set = &tag_set;
+ rv = 0;
+ } else {
+ nullb->tag_set = &nullb->__tag_set;
+ rv = null_init_tag_set(nullb->tag_set);
+ }
+
if (rv)
goto out_cleanup_queues;
- nullb->q = blk_mq_init_queue(&nullb->tag_set);
+ nullb->q = blk_mq_init_queue(nullb->tag_set);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
goto out_cleanup_tags;
}
+ null_init_queues(nullb);
} else if (queue_mode == NULL_Q_BIO) {
nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
- if (queue_mode == NULL_Q_MQ)
- blk_mq_free_tag_set(&nullb->tag_set);
+ if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
queue_mode = NULL_Q_MQ;
}
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ null_init_tag_set(&tag_set);
+
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
}
mutex_unlock(&lock);
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ blk_mq_free_tag_set(&tag_set);
+
kmem_cache_destroy(ppa_cache);
}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef34d5a..7b8c6368beb7 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
put_disk(disk);
continue;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
cd->disk = disk;
cd->pi = &cd->pia;
cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
ps_set_intr(do_pcd_read, NULL, 0, nice);
return;
} else {
- __blk_end_request_all(pcd_req, -EIO);
+ __blk_end_request_all(pcd_req, BLK_STS_IOERR);
pcd_req = NULL;
}
}
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
pcd_request();
}
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
{
unsigned long saved_flags;
@@ -837,7 +838,7 @@ static void pcd_start(void)
if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
pcd_bufblk = -1;
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
return;
}
pcd_bufblk = -1;
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f90978..27a44b97393a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
phase = NULL;
spin_lock_irqsave(&pd_lock, saved_flags);
if (!__blk_end_request_cur(pd_req,
- res == Ok ? 0 : -EIO)) {
+ res == Ok ? 0 : BLK_STS_IOERR)) {
if (!set_next_request())
stop = 1;
}
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
return;
}
blk_queue_max_hw_sectors(p->queue, cluster);
+ blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
if (disk->drive == -1) {
for (disk->drive = 0; disk->drive <= 1; disk->drive++)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca7315ddc..eef7a91f667d 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
return;
}
blk_queue_max_segments(disk->queue, cluster);
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
pf->disk = disk;
pf->pi = &pf->pia;
pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
return pf_req != NULL;
}
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
{
if (pf_req && !__blk_end_request_cur(pf_req, err))
pf_req = NULL;
@@ -821,7 +822,7 @@ repeat:
pf_count = blk_rq_cur_sectors(pf_req);
if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
- pf_end_request(-EIO);
+ pf_end_request(BLK_STS_IOERR);
goto repeat;
}
@@ -836,7 +837,7 @@ repeat:
pi_do_claimed(pf_current->pi, do_pf_write);
else {
pf_busy = 0;
- pf_end_request(-EIO);
+ pf_end_request(BLK_STS_IOERR);
goto repeat;
}
}
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
return 0;
}
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
{
unsigned long saved_flags;
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
pi_do_claimed(pf_current->pi, do_pf_read_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
pi_do_claimed(pf_current->pi, do_pf_read_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_disconnect(pf_current->pi);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 205b865ebeb9..467beca397a2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -98,6 +98,7 @@ static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
if (IS_ERR(rq))
return PTR_ERR(rq);
- scsi_req_init(rq);
if (cgc->buflen) {
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&pkt->io_errors);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+ pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
pd->stats.pkt_ended++;
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
pkt_queue_bio(pd, pkt->w_bio);
}
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
{
struct bio *bio;
- if (error)
+ if (status)
pkt->cache_valid = 0;
/* Finish all bios corresponding to this packet */
while ((bio = bio_list_pop(&pkt->orig_bios))) {
- bio->bi_error = error;
+ bio->bi_status = status;
bio_endio(bio);
}
}
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
if (atomic_read(&pkt->io_wait) > 0)
return;
- if (!pkt->w_bio->bi_error) {
+ if (!pkt->w_bio->bi_status) {
pkt_set_state(pkt, PACKET_FINISHED_STATE);
} else {
pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
break;
case PACKET_FINISHED_STATE:
- pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+ pkt_finish_packet(pkt, pkt->w_bio->bi_status);
return;
default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
struct packet_stacked_data *psd = bio->bi_private;
struct pktcdvd_device *pd = psd->pd;
- psd->bio->bi_error = bio->bi_error;
+ psd->bio->bi_status = bio->bi_status;
bio_put(bio);
bio_endio(psd->bio);
mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+ struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
char b[BDEVNAME_SIZE];
struct bio *split;
- blk_queue_bounce(q, &bio);
-
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
pd = q->queuedata;
if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
split = bio_split(bio, last_zone -
bio->bi_iter.bi_sector,
- GFP_NOIO, fs_bio_set);
+ GFP_NOIO, pkt_bio_set);
bio_chain(split, bio);
} else {
split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
bdev = bdget(dev);
if (!bdev)
return -ENOMEM;
+ if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+ WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+ bdput(bdev);
+ return -EINVAL;
+ }
ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
if (ret)
return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
sizeof(struct packet_stacked_data));
if (!psd_pool)
return -ENOMEM;
+ pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pkt_bio_set) {
+ mempool_destroy(psd_pool);
+ return -ENOMEM;
+ }
ret = register_blkdev(pktdev_major, DRIVER_NAME);
if (ret < 0) {
@@ -2951,6 +2959,7 @@ out:
unregister_blkdev(pktdev_major, DRIVER_NAME);
out2:
mempool_destroy(psd_pool);
+ bioset_free(pkt_bio_set);
return ret;
}
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
unregister_blkdev(pktdev_major, DRIVER_NAME);
mempool_destroy(psd_pool);
+ bioset_free(pkt_bio_set);
}
MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e9feb8..075662f2cf46 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
if (res) {
dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
__LINE__, op, res);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return 0;
}
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
if (res) {
dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
__func__, __LINE__, res);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return 0;
}
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
break;
default:
blk_dump_rq_flags(req, DEVICE_NAME " bad request");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
}
}
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
struct ps3_storage_device *dev = data;
struct ps3disk_private *priv;
struct request *req;
- int res, read, error;
+ int res, read;
+ blk_status_t error;
u64 tag, status;
const char *op;
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
if (status) {
dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
__LINE__, op, status);
- error = -EIO;
+ error = BLK_STS_IOERR;
} else {
dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
__LINE__, op);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..e0e81cacd781 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
kfree(priv->cache.tags);
}
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
size_t len, size_t *retlen, u_char *buf)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
(unsigned int)from, len);
if (from >= priv->size)
- return -EIO;
+ return BLK_STS_IOERR;
if (len > priv->size - from)
len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
return 0;
}
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
size_t len, size_t *retlen, const u_char *buf)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
unsigned int cached, count;
if (to >= priv->size)
- return -EIO;
+ return BLK_STS_IOERR;
if (len > priv->size - to)
len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
int write = bio_data_dir(bio) == WRITE;
const char *op = write ? "write" : "read";
loff_t offset = bio->bi_iter.bi_sector << 9;
- int error = 0;
+ blk_status_t error = 0;
struct bio_vec bvec;
struct bvec_iter iter;
struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
if (retlen != len) {
dev_err(&dev->core, "Short %s\n", op);
- error = -EIO;
+ error = BLK_STS_IOERR;
goto out;
}
@@ -593,7 +593,7 @@ out:
next = bio_list_peek(&priv->list);
spin_unlock_irq(&priv->lock);
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
return next;
}
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
dev_dbg(&dev->core, "%s\n", __func__);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c16f74547804..b008b6a98098 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -442,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
static struct kmem_cache *rbd_img_request_cache;
static struct kmem_cache *rbd_obj_request_cache;
+static struct bio_set *rbd_bio_clone;
+
static int rbd_major;
static DEFINE_IDA(rbd_dev_id_ida);
@@ -1363,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
{
struct bio *bio;
- bio = bio_clone(bio_src, gfpmask);
+ bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
if (!bio)
return NULL; /* ENOMEM */
@@ -2293,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
rbd_assert(img_request->obj_request != NULL);
more = obj_request->which < img_request->obj_request_count - 1;
} else {
+ blk_status_t status = errno_to_blk_status(result);
+
rbd_assert(img_request->rq != NULL);
- more = blk_update_request(img_request->rq, result, xferred);
+ more = blk_update_request(img_request->rq, status, xferred);
if (!more)
- __blk_mq_end_request(img_request->rq, result);
+ __blk_mq_end_request(img_request->rq, status);
}
return more;
@@ -4150,17 +4154,17 @@ err_rq:
obj_op_name(op_type), length, offset, result);
ceph_put_snap_context(snapc);
err:
- blk_mq_end_request(rq, result);
+ blk_mq_end_request(rq, errno_to_blk_status(result));
}
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
struct work_struct *work = blk_mq_rq_to_pdu(rq);
queue_work(rbd_wq, work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -6414,8 +6418,16 @@ static int rbd_slab_init(void)
if (!rbd_obj_request_cache)
goto out_err;
+ rbd_assert(!rbd_bio_clone);
+ rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!rbd_bio_clone)
+ goto out_err_clone;
+
return 0;
+out_err_clone:
+ kmem_cache_destroy(rbd_obj_request_cache);
+ rbd_obj_request_cache = NULL;
out_err:
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;
@@ -6431,6 +6443,10 @@ static void rbd_slab_exit(void)
rbd_assert(rbd_img_request_cache);
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;
+
+ rbd_assert(rbd_bio_clone);
+ bioset_free(rbd_bio_clone);
+ rbd_bio_clone = NULL;
}
static int __init rbd_init(void)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..7f4acebf4657 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
{
struct rsxx_cardinfo *card = q->queuedata;
struct rsxx_bio_meta *bio_meta;
- int st = -EINVAL;
+ blk_status_t st = BLK_STS_IOERR;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
might_sleep();
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
if (bio_end_sector(bio) > get_capacity(card->gendisk))
goto req_err;
- if (unlikely(card->halt)) {
- st = -EFAULT;
+ if (unlikely(card->halt))
goto req_err;
- }
- if (unlikely(card->dma_fault)) {
- st = (-EFAULT);
+ if (unlikely(card->dma_fault))
goto req_err;
- }
if (bio->bi_iter.bi_size == 0) {
dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
if (!bio_meta) {
- st = -ENOMEM;
+ st = BLK_STS_RESOURCE;
goto req_err;
}
@@ -205,7 +201,7 @@ queue_err:
kmem_cache_free(bio_meta_pool, bio_meta);
req_err:
if (st)
- bio->bi_error = st;
+ bio->bi_status = st;
bio_endio(bio);
return BLK_QC_T_NONE;
}
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
}
blk_queue_make_request(card->queue, rsxx_make_request);
- blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f87d0..6a1b2177951c 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
mutex_unlock(&ctrl->work_lock);
}
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
struct list_head *q,
unsigned int laddr,
rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
if (!dma)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
dma->cmd = HW_CMD_BLK_DISCARD;
dma->laddr = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
return 0;
}
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
struct list_head *q,
int dir,
unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
if (!dma)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
dma->laddr = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
return 0;
}
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
unsigned int dma_len;
int dma_cnt[RSXX_MAX_TARGETS];
int tgt;
- int st;
+ blk_status_t st;
int i;
addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
for (i = 0; i < card->n_targets; i++)
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
FREE_DMA);
-
return st;
}
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d0f690..277f27e673a2 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
void rsxx_dma_cleanup(void);
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4dae2a..d0368682bd43 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
struct skd_special_context *skspcl);
static void skd_request_fn(struct request_queue *rq);
static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
if (req == NULL)
break;
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
}
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
struct request *req = NULL;
struct skd_scsi_request *scsi_req;
unsigned long io_flags;
- int error;
u32 lba;
u32 count;
int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
if (!req->bio)
goto skip_sg;
- error = skd_preop_sg_list(skdev, skreq);
-
- if (error != 0) {
+ if (!skd_preop_sg_list(skdev, skreq)) {
/*
* Complete the native request with error.
* Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
*/
pr_debug("%s:%s:%d error Out\n",
skdev->name, __func__, __LINE__);
- skd_end_request(skdev, skreq, error);
+ skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
continue;
}
@@ -805,7 +802,7 @@ skip_sg:
}
static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, int error)
+ struct skd_request_context *skreq, blk_status_t error)
{
if (unlikely(error)) {
struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
__blk_end_request_all(skreq->req, error);
}
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
n_sg = blk_rq_map_sg(skdev->queue, req, sg);
if (n_sg <= 0)
- return -EINVAL;
+ return false;
/*
* Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
*/
n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
if (n_sg <= 0)
- return -EINVAL;
+ return false;
SKD_ASSERT(n_sg <= skdev->sgs_per_request);
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
}
}
- return 0;
+ return true;
}
static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, 0);
+ skd_end_request(skdev, skreq, BLK_STS_OK);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, -EIO);
+ skd_end_request(skdev, skreq, BLK_STS_IOERR);
break;
}
}
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
* native request.
*/
if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, 0);
+ skd_end_request(skdev, skreq, BLK_STS_OK);
else
skd_resolve_req_exception(skdev, skreq);
}
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
SKD_MAX_RETRIES)
blk_requeue_request(skdev->queue, skreq->req);
else
- skd_end_request(skdev, skreq, -EIO);
+ skd_end_request(skdev, skreq, BLK_STS_IOERR);
skreq->req = NULL;
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
rc = -ENOMEM;
goto err_out;
}
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
skdev->queue = q;
disk->queue = q;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab3d50a..6b16ead1da58 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
rqe->req = NULL;
- __blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+ __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
vdc_blk_queue_start(port);
}
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
struct request *req;
while ((req = blk_fetch_request(port->disk->queue)) != NULL)
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6cf375..84434d3ea19b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
return ret;
}
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
int req_sector, int sectors_nb,
unsigned char *buffer)
{
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
ret = swim_read_sector(fs, side, track, sector,
buffer);
if (try-- == 0)
- return -EIO;
+ return BLK_STS_IOERR;
} while (ret != 512);
buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
req = swim_next_request(swd);
while (req) {
- int err = -EIO;
+ blk_status_t err = BLK_STS_IOERR;
fs = req->rq_disk->private_data;
if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
put_disk(swd->unit[drive].disk);
goto exit_put_disks;
}
+ blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+ BLK_BOUNCE_HIGH);
swd->unit[drive].disk->queue->queuedata = swd;
swd->unit[drive].swd = swd;
}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c9bdba..9f931f8f6b4c 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
unsigned int clearing);
static int floppy_revalidate(struct gendisk *disk);
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
{
struct request *req = fs->cur_req;
int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
if (fs->mdev->media_bay &&
check_media_bay(fs->mdev->media_bay) != MB_FD) {
swim3_dbg("%s", " media bay absent, dropping req\n");
- swim3_end_request(fs, -ENODEV, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
if (blk_rq_pos(req) >= fs->total_secs) {
swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
(long)blk_rq_pos(req), (long)fs->total_secs);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
if (fs->ejected) {
swim3_dbg("%s", " disk ejected\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
fs->write_prot = swim3_readbit(fs, WRITE_PROT);
if (fs->write_prot) {
swim3_dbg("%s", " try to write, disk write protected\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
}
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
if (fs->retries > 5) {
swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
fs->req_cyl, fs->cur_cyl);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
return;
}
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
out_8(&sw->intr_enable, 0);
fs->cur_cyl = -1;
if (fs->retries > 5) {
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
} else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
out_8(&sw->select, RELAX);
out_8(&sw->intr_enable, 0);
swim3_err("%s", "Seek timeout\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
goto unlock;
}
swim3_err("%s", "Seek settle timeout\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
swim3_err("Timeout %sing sector %ld\n",
(rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
(long)blk_rq_pos(fs->cur_req));
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
swim3_err("%s", "Seen sector but cyl=ff?\n");
fs->cur_cyl = -1;
if (fs->retries > 5) {
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
} else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
swim3_err("Error %sing block %ld (err=%x)\n",
rq_data_dir(req) == WRITE? "writ": "read",
(long)blk_rq_pos(req), err);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
}
} else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n",
fs->state, rq_data_dir(req), intr, err);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
put_disk(disk);
return -ENOMEM;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
disk->queue->queuedata = &floppy_states[index];
if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
return 0;
}
-static struct of_device_id swim3_match[] =
+static const struct of_device_id swim3_match[] =
{
{
.name = "swim3",
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072caf56f..08586dc14e85 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
static inline void carm_end_request_queued(struct carm_host *host,
struct carm_request *crq,
- int error)
+ blk_status_t error)
{
struct request *req = crq->rq;
int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
}
static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
- int error)
+ blk_status_t error)
{
carm_end_request_queued(host, crq, error);
if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
sg = &crq->sg[0];
n_elem = blk_rq_map_sg(q, rq, sg);
if (n_elem <= 0) {
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
return; /* request with no s/g entries? */
}
/* map scatterlist to PCI bus addresses */
n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
if (n_elem <= 0) {
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
return; /* request with no s/g entries? */
}
crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
static void carm_handle_array_info(struct carm_host *host,
struct carm_request *crq, u8 *mem,
- int error)
+ blk_status_t error)
{
struct carm_port *port;
u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
static void carm_handle_scan_chan(struct carm_host *host,
struct carm_request *crq, u8 *mem,
- int error)
+ blk_status_t error)
{
u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
}
static void carm_handle_generic(struct carm_host *host,
- struct carm_request *crq, int error,
+ struct carm_request *crq, blk_status_t error,
int cur_state, int next_state)
{
DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
}
static inline void carm_handle_rw(struct carm_host *host,
- struct carm_request *crq, int error)
+ struct carm_request *crq, blk_status_t error)
{
int pci_dir;
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
u32 handle = le32_to_cpu(ret_handle_le);
unsigned int msg_idx;
struct carm_request *crq;
- int error = (status == RMSG_OK) ? 0 : -EIO;
+ blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
u8 *mem;
VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
err_out:
printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
}
static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..0677d2514665 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
if (control & DMASCR_HARD_ERROR) {
/* error */
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
dev_printk(KERN_WARNING, &card->dev->dev,
"I/O error on sector %d/%d\n",
le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
spin_lock_irq(&card->lock);
*card->biotail = bio;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c542b4..0297ad7c1452 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
struct scatterlist sg[];
};
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
{
switch (vbr->status) {
case VIRTIO_BLK_S_OK:
- return 0;
+ return BLK_STS_OK;
case VIRTIO_BLK_S_UNSUPP:
- return -ENOTTY;
+ return BLK_STS_NOTSUPP;
default:
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
/* Out of mem doesn't actually happen, since we fall back
* to direct descriptors */
if (err == -ENOMEM || err == -ENOSPC)
- return BLK_MQ_RQ_QUEUE_BUSY;
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
if (notify)
virtqueue_notify(vblk->vqs[qid].vq);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
/* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
goto out;
blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
- err = virtblk_result(blk_mq_rq_to_pdu(req));
+ err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
out:
blk_put_request(req);
return err;
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
/* We can handle whatever the host told us to handle. */
blk_queue_max_segments(q, vblk->sg_elems-2);
- /* No need to bounce any requests */
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0e824091a12f..fe7cd58c43d0 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1066,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
atomic_set(&blkif->drain, 0);
}
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+ blk_status_t error)
{
/* An error fails the entire request. */
- if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
- (error == -EOPNOTSUPP)) {
+ if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+ error == BLK_STS_NOTSUPP) {
pr_debug("flush diskcache op failed, not supported\n");
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
- } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
- (error == -EOPNOTSUPP)) {
+ } else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+ error == BLK_STS_NOTSUPP) {
pr_debug("write barrier op failed, not supported\n");
xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1103,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
*/
static void end_block_io_op(struct bio *bio)
{
- __end_block_io_op(bio->bi_private, bio->bi_error);
+ __end_block_io_op(bio->bi_private, bio->bi_status);
bio_put(bio);
}
@@ -1420,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
for (i = 0; i < nbio; i++)
bio_put(biolist[i]);
atomic_set(&pending_req->pendcnt, 1);
- __end_block_io_op(pending_req, -EINVAL);
+ __end_block_io_op(pending_req, BLK_STS_RESOURCE);
msleep(1); /* back off a bit */
return -EIO;
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 39459631667c..c852ed3c01d5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -110,11 +110,6 @@ struct blk_shadow {
unsigned long associated_id;
};
-struct split_bio {
- struct bio *bio;
- atomic_t pending;
-};
-
struct blkif_req {
int error;
};
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
!info->feature_fua));
}
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
flush_requests(rinfo);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_err:
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
out_busy:
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
blk_mq_stop_hw_queue(hctx);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
-
- /* Make sure we don't use bounce buffers. */
- blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
}
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
continue;
}
- blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+ if (bret->status == BLKIF_RSP_OKAY)
+ blkif_req(req)->error = BLK_STS_OK;
+ else
+ blkif_req(req)->error = BLK_STS_IOERR;
+
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- blkif_req(req)->error = -EOPNOTSUPP;
+ blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- blkif_req(req)->error = -EOPNOTSUPP;
+ blkif_req(req)->error = BLK_STS_NOTSUPP;
}
if (unlikely(blkif_req(req)->error)) {
- if (blkif_req(req)->error == -EOPNOTSUPP)
- blkif_req(req)->error = 0;
+ if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+ blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
return 0;
}
-static void split_bio_end(struct bio *bio)
-{
- struct split_bio *split_bio = bio->bi_private;
-
- if (atomic_dec_and_test(&split_bio->pending)) {
- split_bio->bio->bi_phys_segments = 0;
- split_bio->bio->bi_error = bio->bi_error;
- bio_endio(split_bio->bio);
- kfree(split_bio);
- }
- bio_put(bio);
-}
-
static int blkif_recover(struct blkfront_info *info)
{
- unsigned int i, r_index;
+ unsigned int r_index;
struct request *req, *n;
int rc;
- struct bio *bio, *cloned_bio;
- unsigned int segs, offset;
- int pending, size;
- struct split_bio *split_bio;
+ struct bio *bio;
+ unsigned int segs;
blkfront_gather_backend_features(info);
/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
/* Traverse the list of pending bios and re-queue them */
- if (bio_segments(bio) > segs) {
- /*
- * This bio has more segments than what we can
- * handle, we have to split it.
- */
- pending = (bio_segments(bio) + segs - 1) / segs;
- split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
- BUG_ON(split_bio == NULL);
- atomic_set(&split_bio->pending, pending);
- split_bio->bio = bio;
- for (i = 0; i < pending; i++) {
- offset = (i * segs * XEN_PAGE_SIZE) >> 9;
- size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
- (unsigned int)bio_sectors(bio) - offset);
- cloned_bio = bio_clone(bio, GFP_NOIO);
- BUG_ON(cloned_bio == NULL);
- bio_trim(cloned_bio, offset, size);
- cloned_bio->bi_private = split_bio;
- cloned_bio->bi_end_io = split_bio_end;
- submit_bio(cloned_bio);
- }
- /*
- * Now we have to wait for all those smaller bios to
- * end, so we can also end the "parent" bio.
- */
- continue;
- }
- /* We don't need to split this bio */
submit_bio(bio);
}
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
merge_bio.tail = shadow[j].request->biotail;
bio_list_merge(&info->bio_list, &merge_bio);
shadow[j].request->bio = NULL;
- blk_mq_end_request(shadow[j].request, 0);
+ blk_mq_end_request(shadow[j].request, BLK_STS_OK);
}
}
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2147e0..14459d66ef0c 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
if (!blk_rq_is_passthrough(req))
break;
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
return req;
}
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
/* Drop all in-flight and pending requests */
if (ace->req) {
- __blk_end_request_all(ace->req, -EIO);
+ __blk_end_request_all(ace->req, BLK_STS_IOERR);
ace->req = NULL;
}
while ((req = blk_fetch_request(ace->queue)) != NULL)
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
/* Drop back to IDLE state and notify waiters */
ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
}
/* bio finished; is there another one? */
- if (__blk_end_request_cur(ace->req, 0)) {
+ if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
* blk_rq_sectors(ace->req),
* blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
if (ace->queue == NULL)
goto err_blk_initq;
blk_queue_logical_block_size(ace->queue, 512);
+ blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
/*
* Allocate and initialize GD structure
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e52effa..41c95c9b2ab4 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
while (req) {
unsigned long start = blk_rq_pos(req) << 9;
unsigned long len = blk_rq_cur_bytes(req);
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
if (start + len > z2ram_size) {
pr_err(DEVICE_NAME ": bad access: block=%llu, "
"count=%u\n",
(unsigned long long)blk_rq_pos(req),
blk_rq_cur_sectors(req));
- err = -EIO;
+ err = BLK_STS_IOERR;
goto done;
}
while (len) {
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 76c952fd9ab9..e36d160c458f 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
if (!q)
return -ENXIO;
+ if (!blk_queue_scsi_passthrough(q)) {
+ WARN_ONCE(true,
+ "Attempt read CDDA info through a non-SCSI queue\n");
+ return -EINVAL;
+ }
+
cdi->last_sense = 0;
while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
break;
}
req = scsi_req(rq);
- scsi_req_init(rq);
ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
if (ret) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763a948f..6495b03f576c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
*/
static void gdrom_readdisk_dma(struct work_struct *work)
{
- int err, block, block_cnt;
+ int block, block_cnt;
+ blk_status_t err;
struct packet_command *read_command;
struct list_head *elem, *next;
struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
__raw_writeb(1, GDROM_DMA_STATUS_REG);
wait_event_interruptible_timeout(request_queue,
gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
- err = gd.transfer ? -EIO : 0;
+ err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
gd.transfer = 0;
gd.pending = 0;
/* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
break;
case REQ_OP_WRITE:
pr_notice("Read only device - write request ignored\n");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
break;
default:
printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
break;
}
}
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
err = -ENOMEM;
goto probe_fail_requestq;
}
+ blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
err = probe_gdrom_setupqueue();
if (err)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937284e7..14d1e7d9a1d6 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
int error;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->special = (char *)pc;
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
memset(sense, 0, sizeof(*sense));
blk_rq_init(rq->q, sense_rq);
- scsi_req_init(sense_rq);
+ scsi_req_init(req);
err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
ide_requeue_and_plug(drive, failed_rq);
if (ide_queue_sense_rq(drive, pc)) {
blk_start_request(failed_rq);
- ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
}
}
EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
/* No more interrupts */
if ((stat & ATA_DRQ) == 0) {
- int uptodate, error;
+ int uptodate;
+ blk_status_t error;
debug_log("Packet command completed, %d bytes transferred\n",
blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
if (ata_misc_request(rq)) {
scsi_req(rq)->result = 0;
- error = 0;
+ error = BLK_STS_OK;
} else {
if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
scsi_req(rq)->result = -EIO;
}
- error = uptodate ? 0 : -EIO;
+ error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
}
ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3a64c3..81e18f9628d0 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
cdrom_analyze_sense_data(drive, failed);
- if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+ if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
BUG();
} else
cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
rq = blk_get_request(drive->queue,
write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
ide_req(rq)->type = ATA_PRIV_PC;
rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
nr_bytes -= cmd->last_xfer_len;
if (nr_bytes > 0) {
- ide_complete_rq(drive, 0, nr_bytes);
+ ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
return true;
}
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
out_end:
if (blk_rq_is_scsi(rq) && rc == 0) {
scsi_req(rq)->resid_len = 0;
- blk_end_request_all(rq, 0);
+ blk_end_request_all(rq, BLK_STS_OK);
hwif->rq = NULL;
} else {
if (sense && uptodate)
@@ -699,7 +698,7 @@ out_end:
scsi_req(rq)->resid_len += cmd->last_xfer_len;
}
- ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
if (sense && rc == 2)
ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ out_end:
if (nsectors == 0)
nsectors = 1;
- ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+ ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
return ide_stopped;
}
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736c39c6..9d26c9737e21 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
int ret;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->rq_flags = RQF_QUIET;
blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32ee560..ef7c8c43a380 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
return setting->set(drive, arg);
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 5;
scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237f3479..241983da5fc4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
return -EBUSY;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
drive->mult_req = arg;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c81223e56d..54d4d78ca46a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
ide_finish_cmd(drive, cmd, stat);
else
- ide_complete_rq(drive, 0,
+ ide_complete_rq(drive, BLK_STS_OK,
blk_rq_sectors(cmd->rq) << 9);
return ide_stopped;
}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7d158d..47d5f3379748 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
return ide_stopped;
}
scsi_req(rq)->result = err;
- ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
}
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
}
EXPORT_SYMBOL_GPL(ide_error);
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
{
struct request *rq = drive->hwif->rq;
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
if (err <= 0 && scsi_req(rq)->result == 0)
scsi_req(rq)->result = -EIO;
- ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err, blk_rq_bytes(rq));
}
}
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
}
/* done polling */
hwif->polling = 0;
- ide_complete_drive_reset(drive, 0);
+ ide_complete_drive_reset(drive, BLK_STS_OK);
return ide_stopped;
}
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
ide_hwif_t *hwif = drive->hwif;
const struct ide_port_ops *port_ops = hwif->port_ops;
u8 tmp;
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
if (port_ops && port_ops->reset_poll) {
err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
hwif->name, tmp);
drive->failures++;
- err = -EIO;
+ err = BLK_STS_IOERR;
} else {
tmp = ide_read_error(drive);
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
} else {
ide_reset_report_error(hwif, tmp);
drive->failures++;
- err = -EIO;
+ err = BLK_STS_IOERR;
}
}
out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
if (io_ports->ctl_addr == 0) {
spin_unlock_irqrestore(&hwif->lock, flags);
- ide_complete_drive_reset(drive, -ENXIO);
+ ide_complete_drive_reset(drive, BLK_STS_IOERR);
return ide_stopped;
}
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048cd2df..627b1f62a749 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
drive->failed_pc = NULL;
drive->pc_callback(drive, 0);
- ide_complete_rq(drive, -EIO, done);
+ ide_complete_rq(drive, BLK_STS_IOERR, done);
return ide_stopped;
}
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
if (ata_misc_request(rq)) {
scsi_req(rq)->result = 0;
- ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
} else
goto out_end;
@@ -303,7 +303,7 @@ out_end:
drive->failed_pc = NULL;
if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
scsi_req(rq)->result = -EIO;
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
return ide_stopped;
}
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af721f8cb..3a234701d92c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
#include <linux/uaccess.h>
#include <asm/io.h>
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
unsigned int nr_bytes)
{
/*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
}
}
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
{
ide_hwif_t *hwif = drive->hwif;
struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
* if failfast is set on a request, override number of sectors
* and complete the whole request right now
*/
- if (blk_noretry_request(rq) && error <= 0)
+ if (blk_noretry_request(rq) && error)
nr_bytes = blk_rq_sectors(rq) << 9;
rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
scsi_req(rq)->result = -EIO;
}
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
}
static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
printk("%s: DRIVE_CMD (null)\n", drive->name);
#endif
scsi_req(rq)->result = 0;
- ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
}
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d17297a7a..3661abb16a5f 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
struct request *rq;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
blk_execute_rq(drive->queue, NULL, rq, 0);
err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
int ret = 0;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 1;
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107f59b9..1f264d5d3f3f 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
spin_unlock_irq(&hwif->lock);
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
* timeout has expired, so power management will be reenabled.
*/
rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
- scsi_req_init(rq);
if (IS_ERR(rq))
goto out;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1f40ce..544f02d673ca 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
rq->special = &rqpm;
rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
return ret;
}
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
{
complete(rq->end_io_data);
}
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
if (unlikely(blk_queue_dying(q))) {
rq->rq_flags |= RQF_QUIET;
scsi_req(rq)->result = -ENXIO;
- __blk_end_request_all(rq, 0);
+ __blk_end_request_all(rq, BLK_STS_OK);
spin_unlock_irq(q->queue_lock);
return -ENXIO;
}
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_PM_RESUME;
rq->rq_flags |= RQF_PREEMPT;
rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
drive->hwif->rq = NULL;
- if (blk_end_request(rq, 0, 0))
+ if (blk_end_request(rq, BLK_STS_OK, 0))
BUG();
}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 023562565d11..01b2adfd8226 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
}
}
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
{
struct ide_request *req = blk_mq_rq_to_pdu(rq);
+ scsi_req_init(&req->sreq);
req->sreq.sense = req->sense;
- return 0;
}
/*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
return 1;
q->request_fn = do_ide_request;
- q->init_rq_fn = ide_init_rq;
+ q->initialize_rq_fn = ide_initialize_rq;
q->cmd_size = sizeof(struct ide_request);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
return 1;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f948b76..fd57e8ccc47a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
drive->failed_pc = NULL;
drive->pc_callback(drive, 0);
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
return ide_stopped;
}
ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
BUG_ON(size < 0 || size % tape->blk_size);
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd[13] = cmd;
rq->rq_disk = tape->disk;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d23c9e..4efe4c6e956c 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
}
if (nr_bytes > 0)
- ide_complete_rq(drive, 0, nr_bytes);
+ ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
}
}
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
ide_driveid_update(drive);
}
- ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
}
/*
@@ -394,7 +394,7 @@ out_end:
if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
ide_finish_cmd(drive, cmd, stat);
else
- ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
return ide_stopped;
out_err:
ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
rq = blk_get_request(drive->queue,
(cmd->tf_flags & IDE_TFLAG_WRITE) ?
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
/*
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849bb476c..57eea5a9047f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
* yet.
*/
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
{
ide_hwif_t *hwif = drive->hwif;
void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
if ((sata_stat & 0x03) != 0x03) {
printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
hwif->name, sata_stat);
- return -ENXIO;
+ return BLK_STS_IOERR;
}
}
- return 0;
+ return BLK_STS_OK;
}
/**
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa608ad95..ddae430b6eae 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
}
mutex_unlock(&dev->mlock);
- if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
- return -ENOMEM;
+ ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+ if (ret)
+ return ret;
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
if (!dev->ops->submit_io)
return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
nvm_rq_tgt_to_dev(tgt_dev, rqd);
rqd->dev = tgt_dev;
- return dev->ops->submit_io(dev, rqd);
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io(dev, rqd);
+ if (ret)
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+ return ret;
}
EXPORT_SYMBOL(nvm_submit_io);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea88db84..024a8fc93069 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- if (ret == NVM_IO_REQUEUE) {
+ switch (ret) {
+ case NVM_IO_REQUEUE:
io_schedule();
goto retry;
+ case NVM_IO_ERR:
+ pblk_pipeline_stop(pblk);
+ goto out;
}
if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
+ pblk_rl_inserted(&pblk->rl, nr_entries);
+
out:
pblk_write_should_kick(pblk);
return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..11fe0c5b2a9c 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
*/
#include "pblk.h"
-#include <linux/time.h>
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
- pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+ pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
*ppa = rqd->ppa_addr;
pblk_mark_bb(pblk, line, ppa);
}
+
+ atomic_dec(&pblk->inflight_io);
}
/* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
- up(&pblk->erase_sem);
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->r_rq_pool);
+ mempool_free(rqd, pblk->g_rq_pool);
}
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
spin_unlock(&line->lock);
return;
}
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
if (line->state == PBLK_LINESTATE_CLOSED)
move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
__pblk_map_invalidate(pblk, line, paddr);
}
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- __pblk_map_invalidate(pblk, line, paddr);
-
- pblk_rb_sync_init(&pblk->rwb, NULL);
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
- pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
unsigned int nr_secs)
{
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
pool = pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
} else {
- pool = pblk->r_rq_pool;
- rq_size = pblk_r_rq_size;
+ pool = pblk->g_rq_pool;
+ rq_size = pblk_g_rq_size;
}
rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
if (rw == WRITE)
pool = pblk->w_rq_pool;
else
- pool = pblk->r_rq_pool;
+ pool = pblk->g_rq_pool;
mempool_free(rqd, pool);
}
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
complete(waiting);
}
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
{
- struct bio *bio;
- int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio)
- return;
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
- bio->bi_private = &wait;
- bio->bi_end_io = pblk_end_bio_sync;
+ do {
+ if (!atomic_read(&pblk->inflight_io))
+ break;
- ret = pblk_write_to_cache(pblk, bio, 0);
- if (ret == NVM_IO_OK) {
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: flush cache timed out\n");
- }
- } else if (ret != NVM_IO_DONE) {
- pr_err("pblk: tear down bio failed\n");
- }
+ schedule();
+ } while (1);
+}
- if (bio->bi_error)
- pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+ pblk_rb_flush(&pblk->rwb);
+ do {
+ if (!pblk_rb_sync_count(&pblk->rwb))
+ break;
- bio_put(bio);
+ pblk_write_kick(pblk);
+ schedule();
+ } while (1);
}
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
+ int vsc = le32_to_cpu(*line->vsc);
- if (!line->vsc) {
+ lockdep_assert_held(&line->lock);
+
+ if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list;
}
- } else if (line->vsc < lm->mid_thrs) {
+ } else if (vsc < lm->high_thrs) {
if (line->gc_group != PBLK_LINEGC_HIGH) {
line->gc_group = PBLK_LINEGC_HIGH;
move_list = &l_mg->gc_high_list;
}
- } else if (line->vsc < lm->high_thrs) {
+ } else if (vsc < lm->mid_thrs) {
if (line->gc_group != PBLK_LINEGC_MID) {
line->gc_group = PBLK_LINEGC_MID;
move_list = &l_mg->gc_mid_list;
}
- } else if (line->vsc < line->sec_in_line) {
+ } else if (vsc < line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_LOW) {
line->gc_group = PBLK_LINEGC_LOW;
move_list = &l_mg->gc_low_list;
}
- } else if (line->vsc == line->sec_in_line) {
+ } else if (vsc == line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_EMPTY) {
line->gc_group = PBLK_LINEGC_EMPTY;
move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
line->gc_group = PBLK_LINEGC_NONE;
move_list = &l_mg->corrupt_list;
pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, line->vsc,
+ line->id, vsc,
line->sec_in_line,
lm->high_thrs, lm->mid_thrs);
}
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
#endif
}
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+ pblk->sec_per_write = sec_per_write;
+}
+
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
}
}
#endif
+
+ atomic_inc(&pblk->inflight_io);
+
return nvm_submit_io(dev, rqd);
}
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask)
+ int alloc_type, gfp_t gfp_mask)
{
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
void *kaddr = data;
struct page *page;
struct bio *bio;
int i, ret;
- if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+ if (alloc_type == PBLK_KMALLOC_META)
return bio_map_kern(dev->q, kaddr, len, gfp_mask);
bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush)
{
- int max = pblk->max_write_pgs;
+ int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
return secs_to_sync;
}
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
- int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+ u64 addr;
+ int i;
+
+ addr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ line->cur_sec = addr - nr_secs;
+
+ for (i = 0; i < nr_secs; i++, line->cur_sec--)
+ WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
u64 addr;
int i;
+ lockdep_assert_held(&line->lock);
+
/* logic error: ppa out-of-bounds. Prevent generating bad address */
if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
return addr;
}
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+ u64 paddr;
+
+ spin_lock(&line->lock);
+ paddr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ spin_unlock(&line->lock);
+
+ return paddr;
+}
+
/*
* Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
* taking the per LUN semaphore.
*/
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
- u64 paddr, int dir)
+ void *emeta_buf, u64 paddr, int dir)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
+ void *ppa_list, *meta_list;
struct bio *bio;
struct nvm_rq rqd;
- struct ppa_addr *ppa_list;
- dma_addr_t dma_ppa_list;
- void *emeta = line->emeta;
+ dma_addr_t dma_ppa_list, dma_meta_list;
int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec;
+ int left_ppas = lm->emeta_sec[0];
int id = line->id;
int rq_ppas, rq_len;
int cmd_op, bio_op;
- int flags;
int i, j;
int ret;
DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
if (dir == WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- flags = pblk_set_progr_mode(pblk, WRITE);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
} else
return -EINVAL;
- ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
- if (!ppa_list)
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &dma_meta_list);
+ if (!meta_list)
return -ENOMEM;
+ ppa_list = meta_list + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
rq_len = rq_ppas * geo->sec_size;
- bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
bio_set_op_attrs(bio, bio_op, 0);
rqd.bio = bio;
- rqd.opcode = cmd_op;
- rqd.flags = flags;
- rqd.nr_ppas = rq_ppas;
+ rqd.meta_list = meta_list;
rqd.ppa_list = ppa_list;
+ rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list;
+ rqd.opcode = cmd_op;
+ rqd.nr_ppas = rq_ppas;
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
if (dir == WRITE) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+ rqd.flags = pblk_set_progr_mode(pblk, WRITE);
for (i = 0; i < rqd.nr_ppas; ) {
spin_lock(&line->lock);
paddr = __pblk_alloc_page(pblk, line, min);
spin_unlock(&line->lock);
- for (j = 0; j < min; j++, i++, paddr++)
+ for (j = 0; j < min; j++, i++, paddr++) {
+ meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
rqd.ppa_list[i] =
addr_to_gen_ppa(pblk, paddr, id);
+ }
}
} else {
for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
int pos = pblk_dev_ppa_to_pos(geo, ppa);
+ int read_type = PBLK_READ_RANDOM;
+
+ if (pblk_io_aligned(pblk, rq_ppas))
+ read_type = PBLK_READ_SEQUENTIAL;
+ rqd.flags = pblk_set_read_mode(pblk, read_type);
while (test_bit(pos, line->blk_bitmap)) {
paddr += min;
@@ -645,9 +673,11 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: emeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
- bio_put(bio);
+ if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
if (rqd.error) {
if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
pblk_log_read_err(pblk, &rqd);
}
- emeta += rq_len;
+ emeta_buf += rq_len;
left_ppas -= rq_ppas;
if (left_ppas)
goto next_rq;
free_rqd_dma:
- nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
flags = pblk_set_progr_mode(pblk, WRITE);
- lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
+ flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
} else
return -EINVAL;
memset(&rqd, 0, sizeof(struct nvm_rq));
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
return -ENOMEM;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- if (dir == WRITE)
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+ if (dir == WRITE) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ meta_list[i].lba = lba_list[paddr] = addr_empty;
+ }
}
/*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: smeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
}
free_ppa_list:
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
}
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf)
{
- return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+ return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+ line->emeta_ssec, READ);
}
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
struct nvm_rq rqd;
- int ret;
+ int ret = 0;
DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
- return 0;
+ return ret;
}
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
struct ppa_addr ppa;
- int bit = -1;
+ int ret, bit = -1;
/* Erase only good blocks, one at a time */
do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
spin_unlock(&line->lock);
- if (pblk_blk_erase_sync(pblk, ppa)) {
+ ret = pblk_blk_erase_sync(pblk, ppa);
+ if (ret) {
pr_err("pblk: failed to erase line %d\n", line->id);
- return -ENOMEM;
+ return ret;
}
} while (1);
return 0;
}
+static void pblk_line_setup_metadata(struct pblk_line *line,
+ struct pblk_line_mgmt *l_mg,
+ struct pblk_line_meta *lm)
+{
+ int meta_line;
+
+ lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+ meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+ if (meta_line == PBLK_DATA_LINES) {
+ spin_unlock(&l_mg->free_lock);
+ io_schedule();
+ spin_lock(&l_mg->free_lock);
+ goto retry_meta;
+ }
+
+ set_bit(meta_line, &l_mg->meta_bitmap);
+ line->meta_line = meta_line;
+
+ line->smeta = l_mg->sline_meta[meta_line];
+ line->emeta = l_mg->eline_meta[meta_line];
+
+ memset(line->smeta, 0, lm->smeta_len);
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+ line->emeta->mem = 0;
+ atomic_set(&line->emeta->sync, 0);
+}
+
/* For now lines are always assumed full lines. Thus, smeta former and current
* lun bitmaps are omitted.
*/
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
struct pblk_line *cur)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct line_smeta *smeta = line->smeta;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+ struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
int nr_blk_line;
/* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
}
/* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+ line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
/* Mark LUNs allocated in this line (all for now) */
bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
- smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
- memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
- smeta->header.id = cpu_to_le32(line->id);
- smeta->header.type = cpu_to_le16(line->type);
- smeta->header.version = cpu_to_le16(1);
+ smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+ memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+ smeta_buf->header.id = cpu_to_le32(line->id);
+ smeta_buf->header.type = cpu_to_le16(line->type);
+ smeta_buf->header.version = cpu_to_le16(1);
/* Start metadata */
- smeta->seq_nr = cpu_to_le64(line->seq_nr);
- smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
/* Fill metadata among lines */
if (cur) {
memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta->prev_id = cpu_to_le32(cur->id);
- cur->emeta->next_id = cpu_to_le32(line->id);
+ smeta_buf->prev_id = cpu_to_le32(cur->id);
+ cur->emeta->buf->next_id = cpu_to_le32(line->id);
} else {
- smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
}
/* All smeta must be set at this point */
- smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
- smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+ smeta_buf->header.crc = cpu_to_le32(
+ pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+ smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
/* End metadata */
- memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
- emeta->seq_nr = cpu_to_le64(line->seq_nr);
- emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta->nr_valid_lbas = cpu_to_le64(0);
- emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta->crc = cpu_to_le32(0);
- emeta->prev_id = smeta->prev_id;
+ memcpy(&emeta_buf->header, &smeta_buf->header,
+ sizeof(struct line_header));
+ emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+ emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+ emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ emeta_buf->crc = cpu_to_le32(0);
+ emeta_buf->prev_id = smeta_buf->prev_id;
return 1;
}
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
/* Mark smeta metadata sectors as bad sectors */
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
off = bit * geo->sec_per_pl;
-retry_smeta:
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
- off += geo->sec_per_pl;
- goto retry_smeta;
+ return 1;
}
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
* blocks to make sure that there are enough sectors to store emeta
*/
bit = lm->sec_per_line;
- off = lm->sec_per_line - lm->emeta_sec;
- bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+ off = lm->sec_per_line - lm->emeta_sec[0];
+ bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
while (nr_bb) {
off -= geo->sec_per_pl;
if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
}
}
- line->sec_in_line -= lm->emeta_sec;
+ line->sec_in_line -= lm->emeta_sec[0];
line->emeta_ssec = off;
- line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+ line->nr_valid_lbas = 0;
+ line->left_msecs = line->sec_in_line;
+ *line->vsc = cpu_to_le32(line->sec_in_line);
if (lm->sec_per_line - line->sec_in_line !=
bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
spin_lock(&line->lock);
if (line->state != PBLK_LINESTATE_FREE) {
+ mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ mempool_free(line->map_bitmap, pblk->line_meta_pool);
spin_unlock(&line->lock);
- WARN(1, "pblk: corrupted line state\n");
- return -EINTR;
+ WARN(1, "pblk: corrupted line %d, state %d\n",
+ line->id, line->state);
+ return -EAGAIN;
}
+
line->state = PBLK_LINESTATE_OPEN;
atomic_set(&line->left_eblks, blk_in_line);
atomic_set(&line->left_seblks, blk_in_line);
+
+ line->meta_distance = lm->meta_distance;
spin_unlock(&line->lock);
/* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line = NULL;
- int bit;
+ struct pblk_line *line;
+ int ret, bit;
lockdep_assert_held(&l_mg->free_lock);
-retry_get:
+retry:
if (list_empty(&l_mg->free_list)) {
pr_err("pblk: no free lines\n");
- goto out;
+ return NULL;
}
line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
list_add_tail(&line->list, &l_mg->bad_list);
pr_debug("pblk: line %d is bad\n", line->id);
- goto retry_get;
+ goto retry;
}
- if (pblk_line_prepare(pblk, line)) {
- pr_err("pblk: failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- return NULL;
+ ret = pblk_line_prepare(pblk, line);
+ if (ret) {
+ if (ret == -EAGAIN) {
+ list_add(&line->list, &l_mg->corrupt_list);
+ goto retry;
+ } else {
+ pr_err("pblk: failed to prepare line %d\n", line->id);
+ list_add(&line->list, &l_mg->free_list);
+ l_mg->nr_free_lines++;
+ return NULL;
+ }
}
-out:
return line;
}
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *retry_line;
+retry:
spin_lock(&l_mg->free_lock);
retry_line = pblk_line_get(pblk);
if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock);
- if (pblk_line_erase(pblk, retry_line)) {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+ if (pblk_line_erase(pblk, retry_line))
+ goto retry;
+
return retry_line;
}
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+
+ atomic_set(&rl->rb_space, 0);
+}
+
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
- int meta_line;
int is_next = 0;
spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
line->type = PBLK_LINETYPE_DATA;
l_mg->data_line = line;
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->smeta = l_mg->sline_meta[meta_line].meta;
- line->emeta = l_mg->eline_meta[meta_line].meta;
- line->meta_line = meta_line;
+ pblk_line_setup_metadata(line, l_mg, &pblk->lm);
/* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_set_space_limit(pblk);
+
+ l_mg->data_next = NULL;
+ } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
}
spin_unlock(&l_mg->free_lock);
+ if (pblk_line_erase(pblk, line)) {
+ line = pblk_line_retry(pblk, line);
+ if (!line)
+ return NULL;
+ }
+
pblk_rl_free_lines_dec(&pblk->rl, line);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
- if (pblk_line_erase(pblk, line))
- return NULL;
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, line, NULL)) {
+ if (!pblk_line_init_metadata(pblk, line, NULL)) {
line = pblk_line_retry(pblk, line);
if (!line)
return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
return line;
}
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+ lockdep_assert_held(&pblk->l_mg.free_lock);
+
+ pblk_set_space_limit(pblk);
+ pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ int ret;
+
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state == PBLK_STATE_RECOVERING ||
+ pblk->state == PBLK_STATE_STOPPED) {
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+ pblk->state = PBLK_STATE_RECOVERING;
+ spin_unlock(&l_mg->free_lock);
+
+ pblk_flush_writer(pblk);
+ pblk_wait_for_meta(pblk);
+
+ ret = pblk_recov_pad(pblk);
+ if (ret) {
+ pr_err("pblk: could not close data on teardown(%d)\n", ret);
+ return;
+ }
+
+ flush_workqueue(pblk->bb_wq);
+ pblk_line_close_meta_sync(pblk);
+
+ spin_lock(&l_mg->free_lock);
+ pblk->state = PBLK_STATE_STOPPED;
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
{
- struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *cur, *new;
unsigned int left_seblks;
- int meta_line;
int is_next = 0;
cur = l_mg->data_line;
new = l_mg->data_next;
if (!new)
- return NULL;
+ return;
l_mg->data_line = new;
-retry_line:
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state != PBLK_STATE_RUNNING) {
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+
+ pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+ spin_unlock(&l_mg->free_lock);
+
+retry_erase:
left_seblks = atomic_read(&new->left_seblks);
if (left_seblks) {
/* If line is not fully erased, erase it */
if (atomic_read(&new->left_eblks)) {
if (pblk_line_erase(pblk, new))
- return NULL;
+ return;
} else {
io_schedule();
}
- goto retry_line;
+ goto retry_erase;
}
- spin_lock(&l_mg->free_lock);
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
- }
-
- set_bit(meta_line, &l_mg->meta_bitmap);
- new->smeta = l_mg->sline_meta[meta_line].meta;
- new->emeta = l_mg->eline_meta[meta_line].meta;
- new->meta_line = meta_line;
-
- memset(new->smeta, 0, lm->smeta_len);
- memset(new->emeta, 0, lm->emeta_len);
- spin_unlock(&l_mg->free_lock);
-
- if (is_next)
- pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, new, cur)) {
+ if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
@@ -1292,12 +1410,30 @@ retry_setup:
if (!pblk_line_init_bb(pblk, new, 1)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
- return new;
+ /* Allocate next line for preparation */
+ spin_lock(&l_mg->free_lock);
+ l_mg->data_next = pblk_line_get(pblk);
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_stop_writes(pblk, new);
+ l_mg->data_next = NULL;
+ } else {
+ l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+ l_mg->data_next->type = PBLK_LINETYPE_DATA;
+ is_next = 1;
+ }
+ spin_unlock(&l_mg->free_lock);
+
+ if (is_next)
+ pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
}
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
if (line->invalid_bitmap)
mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
line->map_bitmap = NULL;
line->invalid_bitmap = NULL;
line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
struct nvm_rq *rqd;
int err;
- rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_r_rq_size);
+ rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+ memset(rqd, 0, pblk_g_rq_size);
pblk_setup_e_rq(pblk, rqd, ppa);
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
return pblk->l_mg.data_line;
}
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
{
return pblk->l_mg.data_next;
}
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
return (line->left_msecs == 0);
}
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line *line, *tline;
+ LIST_HEAD(list);
+
+ spin_lock(&l_mg->close_lock);
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return;
+ }
+
+ list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+ spin_unlock(&l_mg->close_lock);
+
+ list_for_each_entry_safe(line, tline, &list, list) {
+ struct pblk_emeta *emeta = line->emeta;
+
+ while (emeta->mem < lm->emeta_len[0]) {
+ int ret;
+
+ ret = pblk_submit_meta_io(pblk, line);
+ if (ret) {
+ pr_err("pblk: sync meta line %d failed (%d)\n",
+ line->id, ret);
+ return;
+ }
+ }
+ }
+
+ pblk_wait_for_meta(pblk);
+ flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+ if (pblk_rl_is_limit(&pblk->rl))
+ pblk_line_close_meta_sync(pblk);
+}
+
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
- line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
- if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
- pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+ struct pblk_line_meta *lm = &pblk->lm;
- WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+ WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
"pblk: corrupt closed line %d\n", line->id);
+#endif
spin_lock(&l_mg->free_lock);
WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
+
+ pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+
+ /* No need for exact vsc value; avoid a big line lock and take aprox. */
+ memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+ memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+ emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+ emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+ spin_lock(&l_mg->close_lock);
+ spin_lock(&line->lock);
+ list_add_tail(&line->list, &l_mg->emeta_list);
+ spin_unlock(&line->lock);
+ spin_unlock(&l_mg->close_lock);
+
+ pblk_line_should_sync_meta(pblk);
}
void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
}
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *))
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq)
{
struct pblk_line_ws *line_ws;
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
line_ws->priv = priv;
INIT_WORK(&line_ws->ws, work);
- queue_work(pblk->kw_wq, &line_ws->ws);
+ queue_work(wq, &line_ws->ws);
}
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
- int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
int ret;
/*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
/* If the LUN has been locked for this same request, do no attempt to
* lock it again
*/
- if (test_and_set_bit(lun_id, lun_bitmap))
+ if (test_and_set_bit(pos, lun_bitmap))
return;
- rlun = &pblk->luns[lun_id];
+ rlun = &pblk->luns[pos];
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
if (ret) {
switch (ret) {
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c6b63c..6090d28f7995 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
- kfree(gc_rq->data);
- kfree(gc_rq->lba_list);
+ vfree(gc_rq->data);
kfree(gc_rq);
}
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
return 1;
}
- list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
- list_move_tail(&gc_rq->list, &w_list);
- gc->w_entries--;
- }
+ list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+ gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
gc_rq->nr_secs, gc_rq->secs_to_gc,
gc_rq->line, PBLK_IOTYPE_GC);
- kref_put(&gc_rq->line->ref, pblk_line_put);
-
list_del(&gc_rq->list);
+ kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
* Responsible for managing all memory related to a gc request. Also in case of
* failure
*/
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
- u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq;
+ struct pblk_line *line = gc_rq->line;
void *data;
unsigned int secs_to_gc;
- int ret = NVM_IO_OK;
+ int ret = 0;
- data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+ data = vmalloc(gc_rq->nr_secs * geo->sec_size);
if (!data) {
- ret = NVM_IO_ERR;
- goto free_lba_list;
+ ret = -ENOMEM;
+ goto out;
}
/* Read from GC victim block */
- if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+ if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
&secs_to_gc, line)) {
- ret = NVM_IO_ERR;
+ ret = -EFAULT;
goto free_data;
}
if (!secs_to_gc)
- goto free_data;
-
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq) {
- ret = NVM_IO_ERR;
- goto free_data;
- }
+ goto free_rq;
- gc_rq->line = line;
gc_rq->data = data;
- gc_rq->lba_list = lba_list;
- gc_rq->nr_secs = nr_secs;
gc_rq->secs_to_gc = secs_to_gc;
- kref_get(&line->ref);
-
retry:
spin_lock(&gc->w_lock);
- if (gc->w_entries > 256) {
+ if (gc->w_entries >= PBLK_GC_W_QD) {
spin_unlock(&gc->w_lock);
- usleep_range(256, 1024);
+ pblk_gc_writer_kick(&pblk->gc);
+ usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
pblk_gc_writer_kick(&pblk->gc);
- return NVM_IO_OK;
+ return 0;
+free_rq:
+ kfree(gc_rq);
free_data:
- kfree(data);
-free_lba_list:
- kfree(lba_list);
-
+ vfree(data);
+out:
+ kref_put(&line->ref, pblk_line_put);
return ret;
}
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
static void pblk_gc_line_ws(struct work_struct *work)
{
+ struct pblk_line_ws *line_rq_ws = container_of(work,
+ struct pblk_line_ws, ws);
+ struct pblk *pblk = line_rq_ws->pblk;
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line = line_rq_ws->line;
+ struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+ up(&gc->gc_sem);
+
+ if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+ pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+ line->id, *line->vsc,
+ gc_rq->nr_secs);
+ }
+
+ mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line = line_ws->line;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- __le64 *lba_list = line_ws->priv;
- u64 *gc_list;
- int sec_left;
- int nr_ppas, bit;
- int put_line = 1;
+ struct pblk_gc *gc = &pblk->gc;
+ struct line_emeta *emeta_buf;
+ struct pblk_line_ws *line_rq_ws;
+ struct pblk_gc_rq *gc_rq;
+ __le64 *lba_list;
+ int sec_left, nr_secs, bit;
+ int ret;
- pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+ emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+ GFP_KERNEL);
+ if (!emeta_buf) {
+ pr_err("pblk: cannot use GC emeta\n");
+ return;
+ }
- spin_lock(&line->lock);
- sec_left = line->vsc;
- if (!sec_left) {
- /* Lines are erased before being used (l_mg->data_/log_next) */
- spin_unlock(&line->lock);
- goto out;
+ ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+ if (ret) {
+ pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+ goto fail_free_emeta;
+ }
+
+ /* If this read fails, it means that emeta is corrupted. For now, leave
+ * the line untouched. TODO: Implement a recovery routine that scans and
+ * moves all sectors on the line.
+ */
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+ if (!lba_list) {
+ pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+ goto fail_free_emeta;
}
- spin_unlock(&line->lock);
+ sec_left = pblk_line_vsc(line);
if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
+ goto fail_free_emeta;
}
bit = -1;
next_rq:
- gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
- if (!gc_list) {
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+ if (!gc_rq)
+ goto fail_free_emeta;
- nr_ppas = 0;
+ nr_secs = 0;
do {
bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
- gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
- } while (nr_ppas < pblk->max_write_pgs);
+ gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+ } while (nr_secs < pblk->max_write_pgs);
- if (unlikely(!nr_ppas)) {
- kfree(gc_list);
+ if (unlikely(!nr_secs)) {
+ kfree(gc_rq);
goto out;
}
- if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
- pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
- line->id, line->vsc,
- nr_ppas, nr_ppas);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq->nr_secs = nr_secs;
+ gc_rq->line = line;
+
+ line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_rq_ws)
+ goto fail_free_gc_rq;
- sec_left -= nr_ppas;
+ line_rq_ws->pblk = pblk;
+ line_rq_ws->line = line;
+ line_rq_ws->priv = gc_rq;
+
+ down(&gc->gc_sem);
+ kref_get(&line->ref);
+
+ INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+ queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+ sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
mempool_free(line_ws, pblk->line_ws_pool);
- atomic_dec(&pblk->gc.inflight_gc);
- if (put_line)
- kref_put(&line->ref, pblk_line_put);
+
+ kref_put(&line->ref, pblk_line_put);
+ atomic_dec(&gc->inflight_gc);
+
+ return;
+
+fail_free_gc_rq:
+ kfree(gc_rq);
+fail_free_emeta:
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ pblk_put_line_back(pblk, line);
+ kref_put(&line->ref, pblk_line_put);
+ mempool_free(line_ws, pblk->line_ws_pool);
+ atomic_dec(&gc->inflight_gc);
+
+ pr_err("pblk: Failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
- __le64 *lba_list;
- int ret;
- line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
- line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
- GFP_KERNEL);
- if (!line->emeta) {
- pr_err("pblk: cannot use GC emeta\n");
- goto fail_free_ws;
- }
-
- ret = pblk_line_read_emeta(pblk, line);
- if (ret) {
- pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
- goto fail_free_emeta;
- }
+ pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
- /* If this read fails, it means that emeta is corrupted. For now, leave
- * the line untouched. TODO: Implement a recovery routine that scans and
- * moves all sectors on the line.
- */
- lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
- if (!lba_list) {
- pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
- goto fail_free_emeta;
- }
+ line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_ws)
+ return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
- line_ws->priv = lba_list;
- INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
- queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+ INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+ queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
+}
-fail_free_emeta:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
- mempool_free(line_ws, pblk->line_ws_pool);
- pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line;
+
+ spin_lock(&gc->r_lock);
+ if (list_empty(&gc->r_list)) {
+ spin_unlock(&gc->r_lock);
+ return 1;
+ }
+
+ line = list_first_entry(&gc->r_list, struct pblk_line, list);
+ list_del(&line->list);
+ spin_unlock(&gc->r_lock);
+
+ pblk_gc_kick(pblk);
- return 1;
+ if (pblk_gc_line(pblk, line))
+ pr_err("pblk: failed to GC line %d\n", line->id);
+
+ return 0;
}
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
- struct pblk_line *line, *tline;
+ wake_up_process(gc->gc_reader_ts);
+}
- list_for_each_entry_safe(line, tline, gc_list, list) {
- if (pblk_gc_line(pblk, line))
- pr_err("pblk: failed to GC line %d\n", line->id);
- list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+ struct list_head *group_list)
+{
+ struct pblk_line *line, *victim;
+ int line_vsc, victim_vsc;
+
+ victim = list_first_entry(group_list, struct pblk_line, list);
+ list_for_each_entry(line, group_list, list) {
+ line_vsc = le32_to_cpu(*line->vsc);
+ victim_vsc = le32_to_cpu(*victim->vsc);
+ if (line_vsc < victim_vsc)
+ victim = line;
}
+
+ return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+ unsigned int nr_blocks_free, nr_blocks_need;
+
+ nr_blocks_need = pblk_rl_high_thrs(rl);
+ nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+ /* This is not critical, no need to take lock here */
+ return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}
/*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line, *tline;
- unsigned int nr_blocks_free, nr_blocks_need;
+ struct pblk_line *line;
struct list_head *group_list;
- int run_gc, gc_group = 0;
- int prev_gc = 0;
- int inflight_gc = atomic_read(&gc->inflight_gc);
- LIST_HEAD(gc_list);
+ bool run_gc;
+ int inflight_gc, gc_group = 0, prev_group = 0;
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(&l_mg->gc_full_list)) {
+ spin_unlock(&l_mg->gc_lock);
+ break;
+ }
+
+ line = list_first_entry(&l_mg->gc_full_list,
+ struct pblk_line, list);
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
spin_unlock(&line->lock);
list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&l_mg->gc_lock);
+ } while (1);
- nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
- nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+ return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
- spin_lock(&l_mg->gc_lock);
- while (run_gc && !list_empty(group_list)) {
- /* No need to queue up more GC lines than we can handle */
- if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(group_list)) {
spin_unlock(&l_mg->gc_lock);
- pblk_gc_lines(pblk, &gc_list);
- return;
+ break;
}
- line = list_first_entry(group_list, struct pblk_line, list);
- nr_blocks_free += atomic_read(&line->blk_in_line);
+ line = pblk_gc_get_victim_line(pblk, group_list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
- list_move_tail(&line->list, &gc_list);
- atomic_inc(&gc->inflight_gc);
- inflight_gc++;
spin_unlock(&line->lock);
- prev_gc = 1;
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
- }
- spin_unlock(&l_mg->gc_lock);
+ list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
+ spin_lock(&gc->r_lock);
+ list_add_tail(&line->list, &gc->r_list);
+ spin_unlock(&gc->r_lock);
- pblk_gc_lines(pblk, &gc_list);
+ inflight_gc = atomic_inc_return(&gc->inflight_gc);
+ pblk_gc_reader_kick(gc);
- if (!prev_gc && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_NR_GC_LISTS)
+ prev_group = 1;
+
+ /* No need to queue up more GC lines than we can handle */
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+ break;
+ } while (1);
+
+ if (!prev_group && pblk->rl.rb_state > gc_group &&
+ gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
wake_up_process(gc->gc_ts);
pblk_gc_writer_kick(gc);
+ pblk_gc_reader_kick(gc);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
return 0;
}
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
{
- pblk->gc.gc_active = 1;
+ struct pblk *pblk = data;
- pr_debug("pblk: gc start\n");
+ while (!kthread_should_stop()) {
+ if (!pblk_gc_read(pblk))
+ continue;
+ set_current_state(TASK_INTERRUPTIBLE);
+ io_schedule();
+ }
+
+ return 0;
}
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- spin_lock(&gc->lock);
- ret = gc->gc_active;
- spin_unlock(&gc->lock);
-
- return ret;
+ pblk->gc.gc_active = 1;
+ pr_debug("pblk: gc start\n");
}
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
- lockdep_assert_held(&gc->lock);
-
if (gc->gc_enabled && !gc->gc_active)
pblk_gc_start(pblk);
-}
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- __pblk_gc_should_start(pblk);
- spin_unlock(&gc->lock);
+ pblk_gc_kick(pblk);
}
/*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
*/
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
- spin_lock(&pblk->gc.lock);
pblk->gc.gc_active = 0;
- spin_unlock(&pblk->gc.lock);
-
pr_debug("pblk: gc stop\n");
}
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
spin_unlock(&gc->lock);
}
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
- int rsv = 0;
+
+ if (force < 0 || force > 1)
+ return -EINVAL;
spin_lock(&gc->lock);
- if (force) {
- gc->gc_enabled = 1;
- rsv = 64;
- }
- pblk_rl_set_gc_rsc(&pblk->rl, rsv);
gc->gc_forced = force;
- __pblk_gc_should_start(pblk);
+
+ if (force)
+ gc->gc_enabled = 1;
+ else
+ gc->gc_enabled = 0;
spin_unlock(&gc->lock);
+
+ pblk_gc_should_start(pblk);
+
+ return 0;
}
int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
goto fail_free_main_kthread;
}
+ gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+ "pblk-gc-reader-ts");
+ if (IS_ERR(gc->gc_reader_ts)) {
+ pr_err("pblk: could not allocate GC reader kthread\n");
+ ret = PTR_ERR(gc->gc_reader_ts);
+ goto fail_free_writer_kthread;
+ }
+
setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
- gc->gc_jobs_active = 8;
gc->w_entries = 0;
atomic_set(&gc->inflight_gc, 0);
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+ /* Workqueue that reads valid sectors from a line and submit them to the
+ * GC writer to be recycled.
+ */
+ gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+ if (!gc->gc_line_reader_wq) {
+ pr_err("pblk: could not allocate GC line reader workqueue\n");
+ ret = -ENOMEM;
+ goto fail_free_reader_kthread;
+ }
+
+ /* Workqueue that prepare lines for GC */
+ gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pr_err("pblk: could not allocate GC reader workqueue\n");
ret = -ENOMEM;
- goto fail_free_writer_kthread;
+ goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
+ spin_lock_init(&gc->r_lock);
+
+ sema_init(&gc->gc_sem, 128);
+
INIT_LIST_HEAD(&gc->w_list);
+ INIT_LIST_HEAD(&gc->r_list);
return 0;
+fail_free_reader_line_wq:
+ destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+ kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
struct pblk_gc *gc = &pblk->gc;
flush_workqueue(gc->gc_reader_wq);
+ flush_workqueue(gc->gc_line_reader_wq);
del_timer(&gc->gc_timer);
pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
- if (pblk->gc.gc_reader_wq)
- destroy_workqueue(pblk->gc.gc_reader_wq);
+ if (gc->gc_reader_wq)
+ destroy_workqueue(gc->gc_reader_wq);
+
+ if (gc->gc_line_reader_wq)
+ destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
+
+ if (gc->gc_reader_ts)
+ kthread_stop(gc->gc_reader_ts);
}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..1b0f61233c21 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
#include "pblk.h"
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
- *pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+ *pblk_w_rq_cache, *pblk_line_meta_cache;
static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* constraint. Writes can be of arbitrary size.
*/
if (bio_data_dir(bio) == READ) {
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
ret = pblk_submit_read(pblk, bio);
if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* available for user I/O.
*/
if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
return -ENOMEM;
}
- pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+ pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
0, 0, NULL);
- if (!pblk_r_rq_cache) {
+ if (!pblk_g_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_w_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
}
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_line_meta_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int max_write_ppas;
- int mod;
- pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
- max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
- pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
- max_write_ppas : nvm_max_phys_sects(dev);
pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
geo->nr_planes * geo->nr_luns;
- if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
- pr_err("pblk: cannot support device max_phys_sect\n");
- return -EINVAL;
- }
-
- div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
- if (mod) {
- pr_err("pblk: bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
if (pblk_init_global_caches(pblk))
return -ENOMEM;
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->page_pool)
return -ENOMEM;
- pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+ pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
pblk_blk_ws_cache);
if (!pblk->line_ws_pool)
goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->rec_pool)
goto free_blk_ws_pool;
- pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
- if (!pblk->r_rq_pool)
+ pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+ pblk_g_rq_cache);
+ if (!pblk->g_rq_pool)
goto free_rec_pool;
- pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+ pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+ pblk_w_rq_cache);
if (!pblk->w_rq_pool)
- goto free_r_rq_pool;
+ goto free_g_rq_pool;
pblk->line_meta_pool =
- mempool_create_slab_pool(16, pblk_line_meta_cache);
+ mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+ pblk_line_meta_cache);
if (!pblk->line_meta_pool)
goto free_w_rq_pool;
- pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!pblk->kw_wq)
+ pblk->close_wq = alloc_workqueue("pblk-close-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+ if (!pblk->close_wq)
goto free_line_meta_pool;
+ pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!pblk->bb_wq)
+ goto free_close_wq;
+
if (pblk_set_ppaf(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
if (pblk_rwb_init(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
INIT_LIST_HEAD(&pblk->compl_list);
return 0;
-free_kw_wq:
- destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+ destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+ destroy_workqueue(pblk->close_wq);
free_line_meta_pool:
mempool_destroy(pblk->line_meta_pool);
free_w_rq_pool:
mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
- mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+ mempool_destroy(pblk->g_rq_pool);
free_rec_pool:
mempool_destroy(pblk->rec_pool);
free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
static void pblk_core_free(struct pblk *pblk)
{
- if (pblk->kw_wq)
- destroy_workqueue(pblk->kw_wq);
+ if (pblk->close_wq)
+ destroy_workqueue(pblk->close_wq);
+
+ if (pblk->bb_wq)
+ destroy_workqueue(pblk->bb_wq);
mempool_destroy(pblk->page_pool);
mempool_destroy(pblk->line_ws_pool);
mempool_destroy(pblk->rec_pool);
- mempool_destroy(pblk->r_rq_pool);
+ mempool_destroy(pblk->g_rq_pool);
mempool_destroy(pblk->w_rq_pool);
mempool_destroy(pblk->line_meta_pool);
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
kmem_cache_destroy(pblk_line_meta_cache);
}
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
kfree(pblk->luns);
}
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+ kfree(line->blk_bitmap);
+ kfree(line->erase_bitmap);
+}
+
static void pblk_lines_free(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
line = &pblk->lines[i];
pblk_line_free(pblk, line);
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
+ pblk_free_line_bitmaps(line);
}
spin_unlock(&l_mg->free_lock);
}
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
kfree(l_mg->bb_template);
kfree(l_mg->bb_aux);
+ kfree(l_mg->vsc_list);
+ spin_lock(&l_mg->free_lock);
for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+ kfree(l_mg->sline_meta[i]);
+ pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+ kfree(l_mg->eline_meta[i]);
}
+ spin_unlock(&l_mg->free_lock);
kfree(pblk->lines);
}
@@ -411,13 +417,31 @@ out:
return ret;
}
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+ int blk_per_line)
{
- struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
int bb_cnt = 0;
int i;
+ for (i = 0; i < blk_per_line; i++) {
+ rlun = &pblk->luns[i];
+ if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+ continue;
+
+ set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+ bb_cnt++;
+ }
+
+ return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
if (!line->blk_bitmap)
return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
return -ENOMEM;
}
- for (i = 0; i < lm->blk_per_line; i++) {
- rlun = &pblk->luns[i];
- if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
- continue;
-
- set_bit(i, line->blk_bitmap);
- bb_cnt++;
- }
-
- return bb_cnt;
+ return 0;
}
static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
}
/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
{
- return (sizeof(struct line_emeta) +
- ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
- (pblk->l_mg.nr_lines * sizeof(u32)) +
- lm->blk_bitmap_len);
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ /* Round to sector size so that lba_list starts on its own sector */
+ lm->emeta_sec[1] = DIV_ROUND_UP(
+ sizeof(struct line_emeta) + lm->blk_bitmap_len,
+ geo->sec_size);
+ lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+ /* Round to sector size so that vsc_list starts on its own sector */
+ lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+ lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+ geo->sec_size);
+ lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+ lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+ geo->sec_size);
+ lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+ lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+ return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
atomic_set(&pblk->rl.free_blocks, nr_free_blks);
}
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i;
+
+ /* smeta is always small enough to fit on a kmalloc memory allocation,
+ * emeta depends on the number of LUNs allocated to the pblk instance
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+ if (!l_mg->sline_meta[i])
+ goto fail_free_smeta;
+ }
+
+ /* emeta allocates three different buffers for managing metadata with
+ * in-memory and in-media layouts
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ struct pblk_emeta *emeta;
+
+ emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+ if (!emeta)
+ goto fail_free_emeta;
+
+ if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+ l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+ emeta->buf = vmalloc(lm->emeta_len[0]);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ } else {
+ l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+ emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ }
+ }
+
+ l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+ if (!l_mg->vsc_list)
+ goto fail_free_emeta;
+
+ for (i = 0; i < l_mg->nr_lines; i++)
+ l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+ return 0;
+
+fail_free_emeta:
+ while (--i >= 0) {
+ vfree(l_mg->eline_meta[i]->buf);
+ kfree(l_mg->eline_meta[i]);
+ }
+
+fail_free_smeta:
+ for (i = 0; i < PBLK_DATA_LINES; i++)
+ kfree(l_mg->sline_meta[i]);
+
+ return -ENOMEM;
+}
+
static int pblk_lines_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *line;
unsigned int smeta_len, emeta_len;
- long nr_bad_blks, nr_meta_blks, nr_free_blks;
- int bb_distance;
- int i;
- int ret;
+ long nr_bad_blks, nr_free_blks;
+ int bb_distance, max_write_ppas, mod;
+ int i, ret;
+
+ pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+ max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+ pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+ max_write_ppas : nvm_max_phys_sects(dev);
+ pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+ if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+ pr_err("pblk: cannot support device max_phys_sect\n");
+ return -EINVAL;
+ }
+
+ div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+ if (mod) {
+ pr_err("pblk: bad configuration of sectors/pages\n");
+ return -EINVAL;
+ }
+
+ l_mg->nr_lines = geo->blks_per_lun;
+ l_mg->log_line = l_mg->data_line = NULL;
+ l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+ l_mg->nr_free_lines = 0;
+ bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
lm->high_thrs = lm->sec_per_line / 2;
lm->mid_thrs = lm->sec_per_line / 4;
+ lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
/* Calculate necessary pages for smeta. See comment over struct
* line_smeta definition
*/
- lm->smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
i = 1;
add_smeta_page:
lm->smeta_sec = i * geo->sec_per_pl;
lm->smeta_len = lm->smeta_sec * geo->sec_size;
- smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+ smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
if (smeta_len > lm->smeta_len) {
i++;
goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
*/
i = 1;
add_emeta_page:
- lm->emeta_sec = i * geo->sec_per_pl;
- lm->emeta_len = lm->emeta_sec * geo->sec_size;
+ lm->emeta_sec[0] = i * geo->sec_per_pl;
+ lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
- emeta_len = calc_emeta_len(pblk, lm);
- if (emeta_len > lm->emeta_len) {
+ emeta_len = calc_emeta_len(pblk);
+ if (emeta_len > lm->emeta_len[0]) {
i++;
goto add_emeta_page;
}
- lm->emeta_bb = geo->nr_luns - i;
-
- nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
- (geo->sec_per_blk / 2)) / geo->sec_per_blk;
- lm->min_blk_line = nr_meta_blks + 1;
-
- l_mg->nr_lines = geo->blks_per_lun;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->sline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
+ lm->emeta_bb = geo->nr_luns - i;
+ lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+ geo->sec_per_blk);
+ if (lm->min_blk_line > lm->blk_per_line) {
+ pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+ lm->blk_per_line);
+ ret = -EINVAL;
+ goto fail;
}
- if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
- l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- vfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- } else {
- l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta =
- kmalloc(lm->emeta_len, GFP_KERNEL);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- }
+ ret = pblk_lines_alloc_metadata(pblk);
+ if (ret)
+ goto fail;
l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
INIT_LIST_HEAD(&l_mg->gc_low_list);
INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ INIT_LIST_HEAD(&l_mg->emeta_list);
+
l_mg->gc_lists[0] = &l_mg->gc_high_list;
l_mg->gc_lists[1] = &l_mg->gc_mid_list;
l_mg->gc_lists[2] = &l_mg->gc_low_list;
spin_lock_init(&l_mg->free_lock);
+ spin_lock_init(&l_mg->close_lock);
spin_lock_init(&l_mg->gc_lock);
pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
line->type = PBLK_LINETYPE_FREE;
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
+ line->vsc = &l_mg->vsc_list[i];
spin_lock_init(&line->lock);
- nr_bad_blks = pblk_bb_line(pblk, line);
+ ret = pblk_alloc_line_bitmaps(pblk, line);
+ if (ret)
+ goto fail_free_lines;
+
+ nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+ pblk_free_line_bitmaps(line);
ret = -EINVAL;
goto fail_free_lines;
}
@@ -713,24 +810,20 @@ add_emeta_page:
pblk_set_provision(pblk, nr_free_blks);
- sema_init(&pblk->erase_sem, 1);
-
/* Cleanup per-LUN bad block lists - managed within lines on run-time */
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
return 0;
fail_free_lines:
- kfree(pblk->lines);
+ while (--i >= 0)
+ pblk_free_line_bitmaps(&pblk->lines[i]);
fail_free_bb_aux:
kfree(l_mg->bb_aux);
fail_free_bb_template:
kfree(l_mg->bb_template);
fail_free_meta:
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
- }
+ pblk_line_meta_free(pblk);
fail:
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
static void pblk_writer_stop(struct pblk *pblk)
{
+ /* The pipeline must be stopped and the write buffer emptied before the
+ * write thread is stopped
+ */
+ WARN(pblk_rb_read_count(&pblk->rwb),
+ "Stopping not fully persisted write buffer\n");
+
+ WARN(pblk_rb_sync_count(&pblk->rwb),
+ "Stopping not fully synced write buffer\n");
+
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
static void pblk_tear_down(struct pblk *pblk)
{
- pblk_flush_writer(pblk);
+ pblk_pipeline_stop(pblk);
pblk_writer_stop(pblk);
pblk_rb_sync_l2p(&pblk->rwb);
- pblk_recov_pad(pblk);
pblk_rwb_free(pblk);
pblk_rl_free(&pblk->rl);
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk->dev = dev;
pblk->disk = tdisk;
+ pblk->state = PBLK_STATE_RUNNING;
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
atomic_long_set(&pblk->req_writes, 0);
atomic_long_set(&pblk->sub_writes, 0);
atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->compl_writes, 0);
atomic_long_set(&pblk->inflight_reads, 0);
+ atomic_long_set(&pblk->cache_reads, 0);
atomic_long_set(&pblk->sync_reads, 0);
atomic_long_set(&pblk->recov_writes, 0);
atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
static int __init pblk_module_init(void)
{
- return nvm_register_tgt_type(&tt_pblk);
+ int ret;
+
+ pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pblk_bio_set)
+ return -ENOMEM;
+ ret = nvm_register_tgt_type(&tt_pblk);
+ if (ret)
+ bioset_free(pblk_bio_set);
+ return ret;
}
static void pblk_module_exit(void)
{
+ bioset_free(pblk_bio_set);
nvm_unregister_tgt_type(&tt_pblk);
}
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c16955284d..fddb924f6dde 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
struct pblk_w_ctx *w_ctx;
- __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
w_ctx->ppa = ppa_list[i];
meta_list[i].lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+ line->nr_valid_lbas++;
} else {
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
- pblk_map_pad_invalidate(pblk, line, paddr);
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ lba_list[paddr] = meta_list[i].lba = addr_empty;
+ __pblk_map_invalidate(pblk, line, paddr);
}
}
if (pblk_line_is_full(line)) {
- line = pblk_line_replace_data(pblk);
- if (!line)
- return;
+ struct pblk_line *prev_line = line;
+
+ pblk_line_replace_data(pblk);
+ pblk_line_close_meta(pblk, prev_line);
}
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+ struct pblk_line_meta *lm = &pblk->lm;
struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
lun_bitmap, &meta_list[i], map_secs);
- erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
- rqd->ppa_list[i].g.ch;
+ erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- if (down_trylock(&pblk->erase_sem))
- continue;
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+ valid_secs, i + min);
+ spin_lock(&e_line->lock);
+ if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
+
*erase_ppa = rqd->ppa_list[i];
erase_ppa->g.blk = e_line->id;
+ spin_unlock(&e_line->lock);
+
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
+ spin_unlock(&e_line->lock);
}
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(ppa_empty(*erase_ppa))) {
- struct pblk_line_meta *lm = &pblk->lm;
+ d_line = pblk_line_get_data(pblk);
+
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return;
- i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
- if (i == lm->blk_per_line)
+ /* Erase blocks that are bad in this line but might not be in next */
+ if (unlikely(ppa_empty(*erase_ppa)) &&
+ bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+ int bit = -1;
+
+retry:
+ bit = find_next_bit(d_line->blk_bitmap,
+ lm->blk_per_line, bit + 1);
+ if (bit >= lm->blk_per_line)
return;
- set_bit(i, e_line->erase_bitmap);
+ spin_lock(&e_line->lock);
+ if (test_bit(bit, e_line->erase_bitmap)) {
+ spin_unlock(&e_line->lock);
+ goto retry;
+ }
+ spin_unlock(&e_line->lock);
+
+ set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+ *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->g.blk = e_line->id;
}
}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384ddc1f9..5ecc154f6831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
pblk_ppa_set_empty(&w_ctx->ppa);
+ w_ctx->lba = ADDR_EMPTY;
}
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+ unsigned int mem = READ_ONCE(rb->mem);
+ unsigned int sync = READ_ONCE(rb->sync);
+
+ return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
struct pblk_line *line;
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ unsigned int user_io = 0, gc_io = 0;
unsigned int i;
+ int flags;
for (i = 0; i < to_update; i++) {
entry = &rb->entries[*l2p_upd];
w_ctx = &entry->w_ctx;
+ flags = READ_ONCE(entry->w_ctx.flags);
+ if (flags & PBLK_IOTYPE_USER)
+ user_io++;
+ else if (flags & PBLK_IOTYPE_GC)
+ gc_io++;
+ else
+ WARN(1, "pblk: unknown IO type\n");
+
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline);
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
}
+ pblk_rl_out(&pblk->rl, user_io, gc_io);
+
return 0;
}
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
/* Protect syncs */
smp_store_release(&rb->sync_point, sync_point);
+ if (!bio)
+ return 0;
+
spin_lock_irq(&rb->s_lock);
bio_list_add(&entry->w_ctx.bios, bio);
spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
return 1;
}
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ unsigned int mem = READ_ONCE(rb->mem);
+
+ if (pblk_rb_sync_point_set(rb, NULL, mem))
+ return;
+
+ pblk_write_should_kick(pblk);
+}
+
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos, struct bio *bio,
int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
unsigned int nr_entries, unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int flush_done;
+ int io_ret;
spin_lock(&rb->w_lock);
- if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+ io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+ if (io_ret) {
spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
+ return io_ret;
}
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+ if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
spin_unlock(&rb->w_lock);
return NVM_IO_REQUEUE;
}
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
pblk_rl_user_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
- return flush_done;
+ return io_ret;
}
/*
@@ -521,20 +557,18 @@ out:
* This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media.
*/
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ struct request_queue *q = pblk->dev->q;
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct pblk_rb_entry *entry;
struct page *page;
- unsigned int pad = 0, read = 0, to_read = nr_entries;
- unsigned int user_io = 0, gc_io = 0;
+ unsigned int pad = 0, to_read = nr_entries;
unsigned int i;
int flags;
- int ret;
if (count < nr_entries) {
pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
*/
try:
flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA))
+ if (!(flags & PBLK_WRITTEN_DATA)) {
+ io_schedule();
goto try;
-
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
+ }
page = virt_to_page(entry->data);
if (!page) {
@@ -570,17 +599,17 @@ try:
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
- ret = bio_add_page(bio, page, rb->seg_size, 0);
- if (ret != rb->seg_size) {
+ if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+ rb->seg_size) {
pr_err("pblk: could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
pos = (pos + 1) & (rb->nr_entries - 1);
}
- read = to_read;
- pblk_rl_out(&pblk->rl, user_io, gc_io);
+ if (pad) {
+ if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+ pr_err("pblk: could not pad page in write bio\n");
+ return NVM_IO_ERR;
+ }
+ }
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes);
#endif
-out:
- return read;
+
+ return NVM_IO_OK;
}
/*
@@ -623,15 +657,17 @@ out:
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter)
+ struct ppa_addr ppa, int bio_iter)
{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ struct ppa_addr l2p_ppa;
+ u64 pos = pblk_addr_to_cacheline(ppa);
void *data;
int flags;
int ret = 1;
- spin_lock(&rb->w_lock);
#ifdef CONFIG_NVM_DEBUG
/* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
w_ctx = &entry->w_ctx;
flags = READ_ONCE(w_ctx->flags);
+ spin_lock(&rb->w_lock);
+ spin_lock(&pblk->trans_lock);
+ l2p_ppa = pblk_trans_map_get(pblk, lba);
+ spin_unlock(&pblk->trans_lock);
+
/* Check if the entry has been overwritten or is scheduled to be */
- if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+ if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+ flags & PBLK_WRITABLE_ENTRY) {
ret = 0;
goto out;
}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..4e5c48f3de62 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
- pblk_addr_to_cacheline(ppa), bio_iter);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
}
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
}
WARN_ON(test_and_set_bit(i, read_bitmap));
advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
/* Read from media non-cached sectors */
rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+ if (pblk_io_aligned(pblk, nr_secs))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
{
int err;
- rqd->flags = pblk_set_read_mode(pblk);
-
err = pblk_submit_io(pblk, rqd);
if (err)
return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (rqd->error)
pblk_log_read_err(pblk, rqd);
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+ WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
#endif
- if (rqd->nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(bio);
- if (r_ctx->orig_bio) {
+ if (r_ctx->private) {
+ struct bio *orig_bio = r_ctx->private;
+
#ifdef CONFIG_NVM_DEBUG
- WARN_ONCE(r_ctx->orig_bio->bi_error,
- "pblk: corrupted read bio\n");
+ WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
#endif
- bio_endio(r_ctx->orig_bio);
- bio_put(r_ctx->orig_bio);
+ bio_endio(orig_bio);
+ bio_put(orig_bio);
}
#ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
#endif
pblk_free_rqd(pblk, rqd, READ);
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd->end_io = NULL;
if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
goto retry;
}
WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
rqd->ppa_addr = ppa;
}
+
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
}
int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
*/
bio_init_idx = pblk_get_bi_idx(bio);
+ rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd->dma_meta_list);
+ if (!rqd->meta_list) {
+ pr_err("pblk: not able to allocate ppa list\n");
+ goto fail_rqd_free;
+ }
+
if (nr_secs > 1) {
- rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("pblk: not able to allocate ppa list\n");
- goto fail_rqd_free;
- }
+ rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+ rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
} else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
bio_get(bio);
if (bitmap_full(&read_bitmap, nr_secs)) {
bio_endio(bio);
+ atomic_inc(&pblk->inflight_io);
pblk_end_io_read(rqd);
return NVM_IO_OK;
}
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
/* All sectors are to be read from the device */
if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
struct bio *int_bio = NULL;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
/* Clone read bio to deal with read errors internally */
- int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
if (!int_bio) {
pr_err("pblk: could not clone read bio\n");
return NVM_IO_ERR;
}
rqd->bio = int_bio;
- r_ctx->orig_bio = bio;
+ r_ctx->private = bio;
ret = pblk_submit_read_io(pblk, rqd);
if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
struct bio *bio;
struct nvm_rq rqd;
int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
memset(&rqd, 0, sizeof(struct nvm_rq));
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
+ return NVM_IO_ERR;
+
if (nr_secs > 1) {
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
- return NVM_IO_ERR;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
nr_secs);
- if (*secs_to_gc == 1) {
- struct ppa_addr ppa;
-
- ppa = rqd.ppa_list[0];
- nvm_dev_dma_free(dev->parent, rqd.ppa_list,
- rqd.dma_ppa_list);
- rqd.ppa_addr = ppa;
- }
+ if (*secs_to_gc == 1)
+ rqd.ppa_addr = rqd.ppa_list[0];
} else {
*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
}
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
goto out;
data_len = (*secs_to_gc) * geo->sec_size;
- bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+ PBLK_KMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
rqd.nr_ppas = *secs_to_gc;
+ rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd.bio = bio;
ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: GC read I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
#endif
out:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_OK;
err_free_dma:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_ERR;
}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f85087cd3c..0e48d3e4e143 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
return 0;
}
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
- crc = pblk_calc_emeta_crc(pblk, emeta);
- if (le32_to_cpu(emeta->crc) != crc)
+ crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+ if (le32_to_cpu(emeta_buf->crc) != crc)
return NULL;
- if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
return NULL;
- return pblk_line_emeta_to_lbas(emeta);
+ return emeta_to_lbas(pblk, emeta_buf);
}
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
__le64 *lba_list;
int data_start;
int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
int i;
- lba_list = pblk_recov_get_lba_list(pblk, emeta);
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
if (!lba_list)
return 1;
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
- nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+ nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+ nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (test_and_set_bit(i, line->invalid_bitmap))
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
else
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
spin_unlock(&line->lock);
continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (nr_valid_lbas != nr_lbas)
pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
- line->id, line->emeta->nr_valid_lbas, nr_lbas);
+ line->id, emeta_buf->nr_valid_lbas, nr_lbas);
line->left_msecs = 0;
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+ return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
nr_bb * geo->sec_per_blk;
}
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
r_ptr_int = r_ptr;
next_read_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -295,7 +300,7 @@ next_read_rq:
pr_err("pblk: L2P recovery read timed out\n");
return -EINTR;
}
-
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
return 0;
}
+static void pblk_recov_complete(struct kref *ref)
+{
+ struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+ complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+ struct pblk_pad_rq *pad_rq = rqd->private;
+ struct pblk *pblk = pad_rq->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, WRITE);
+}
+
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int left_ppas)
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
struct pblk_sec_meta *meta_list;
+ struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
- int left_line_ppas = line->left_msecs;
- int rq_ppas, rq_len;
+ int left_line_ppas, rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
+ spin_lock(&line->lock);
+ left_line_ppas = line->left_msecs;
+ spin_unlock(&line->lock);
+
+ pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+ if (!pad_rq)
+ return -ENOMEM;
+
+ data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ if (!data) {
+ ret = -ENOMEM;
+ goto free_rq;
+ }
+
+ pad_rq->pblk = pblk;
+ init_completion(&pad_rq->wait);
+ kref_init(&pad_rq->ref);
next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
+ if (rq_ppas < pblk->min_write_pgs) {
+ pr_err("pblk: corrupted pad line %d\n", line->id);
+ goto free_rq;
+ }
+
rq_len = rq_ppas * geo->sec_size;
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+ if (!meta_list) {
+ ret = -ENOMEM;
+ goto free_data;
+ }
+
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+ rqd = pblk_alloc_rqd(pblk, WRITE);
+ if (IS_ERR(rqd)) {
+ ret = PTR_ERR(rqd);
+ goto fail_free_meta;
+ }
+ memset(rqd, 0, pblk_w_rq_size);
+
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- memset(rqd, 0, pblk_r_rq_size);
-
rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
+ rqd->end_io = pblk_end_io_recov;
+ rqd->private = pad_rq;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+ lba_list[w_ptr] = meta_list[i].lba = addr_empty;
rqd->ppa_list[i] = dev_ppa;
}
}
+ kref_get(&pad_rq->ref);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- return ret;
+ goto free_data;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery write timed out\n");
- }
- reinit_completion(&wait);
+ atomic_dec(&pblk->inflight_io);
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
- if (left_ppas > 0 && left_line_ppas)
+ if (left_ppas && left_line_ppas)
goto next_pad_rq;
- return 0;
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+
+ if (!wait_for_completion_io_timeout(&pad_rq->wait,
+ msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+ pr_err("pblk: pad write timed out\n");
+ ret = -ETIME;
+ }
+
+free_rq:
+ kfree(pad_rq);
+free_data:
+ vfree(data);
+ return ret;
+
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+ nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+ kfree(pad_rq);
+ return ret;
}
/* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
rec_round = 0;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -510,6 +583,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
if (pad_secs > line->left_msecs)
pad_secs = line->left_msecs;
- ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+ ret = pblk_recov_pad_oob(pblk, line, pad_secs);
if (ret)
pr_err("pblk: OOB padding failed (err:%d)\n", ret);
@@ -552,7 +626,6 @@ next_rq:
if (ret)
pr_err("pblk: OOB read failed (err:%d)\n", ret);
- line->left_ssecs = line->left_msecs;
left_ppas = 0;
}
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
*done = 1;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -646,6 +723,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
/* Roll back failed sectors */
line->cur_sec -= nr_error_bits;
line->left_msecs += nr_error_bits;
- line->left_ssecs = line->left_msecs;
bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line, *tline, *data_line = NULL;
- struct line_smeta *smeta;
- struct line_emeta *emeta;
+ struct pblk_smeta *smeta;
+ struct pblk_emeta *emeta;
+ struct line_smeta *smeta_buf;
int found_lines = 0, recovered_lines = 0, open_lines = 0;
int is_next = 0;
int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
spin_lock(&l_mg->free_lock);
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line].meta;
- emeta = l_mg->eline_meta[meta_line].meta;
+ smeta = l_mg->sline_meta[meta_line];
+ emeta = l_mg->eline_meta[meta_line];
+ smeta_buf = (struct line_smeta *)smeta;
spin_unlock(&l_mg->free_lock);
/* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
memset(smeta, 0, lm->smeta_len);
line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta)) +
+ line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_read_smeta(pblk, line))
continue;
- crc = pblk_calc_smeta_crc(pblk, smeta);
- if (le32_to_cpu(smeta->crc) != crc)
+ crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+ if (le32_to_cpu(smeta_buf->crc) != crc)
continue;
- if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
- if (le16_to_cpu(smeta->header.version) != 1) {
+ if (le16_to_cpu(smeta_buf->header.version) != 1) {
pr_err("pblk: found incompatible line version %u\n",
- smeta->header.version);
+ smeta_buf->header.version);
return ERR_PTR(-EINVAL);
}
/* The first valid instance uuid is used for initialization */
if (!valid_uuid) {
- memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+ memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
valid_uuid = 1;
}
- if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+ if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
pr_debug("pblk: ignore line %u due to uuid mismatch\n",
i);
continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
/* Update line metadata */
spin_lock(&line->lock);
- line->id = le32_to_cpu(line->smeta->header.id);
- line->type = le16_to_cpu(line->smeta->header.type);
- line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+ line->id = le32_to_cpu(smeta_buf->header.id);
+ line->type = le16_to_cpu(smeta_buf->header.type);
+ line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
spin_unlock(&line->lock);
/* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
pblk_recov_line_add_ordered(&recov_list, line);
found_lines++;
pr_debug("pblk: recovering data line %d, seq:%llu\n",
- line->id, smeta->seq_nr);
+ line->id, smeta_buf->seq_nr);
}
if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
recovered_lines++;
/* Calculate where emeta starts based on the line bb */
- off = lm->sec_per_line - lm->emeta_sec;
+ off = lm->sec_per_line - lm->emeta_sec[0];
nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
off -= nr_bb * geo->sec_per_pl;
- memset(emeta, 0, lm->emeta_len);
- line->emeta = emeta;
line->emeta_ssec = off;
+ line->emeta = emeta;
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
- if (pblk_line_read_emeta(pblk, line)) {
+ if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
@@ -941,58 +1020,26 @@ out:
}
/*
- * Pad until smeta can be read on current data line
+ * Pad current line
*/
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_rq *rqd;
- struct pblk_recov_alloc p;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
+ int left_msecs;
+ int ret = 0;
spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
+ left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd))
- return;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- goto free_rqd;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
- if (!data)
- goto free_meta_list;
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
- pr_err("pblk: Tear down padding failed\n");
- goto free_data;
+ ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+ if (ret) {
+ pr_err("pblk: Tear down padding failed (%d)\n", ret);
+ return ret;
}
- pblk_line_close(pblk, line);
-
-free_data:
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_line_close_meta(pblk, line);
+ return ret;
}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb144f3f..2e6a5361baf0 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+ int rb_space;
+
+ rb_space = atomic_read(&rl->rb_space);
+
+ return (rb_space == 0);
+}
+
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+ int rb_space = atomic_read(&rl->rb_space);
- return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+ if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+ return NVM_IO_ERR;
+
+ if (rb_user_cnt >= rl->rb_user_max)
+ return NVM_IO_REQUEUE;
+
+ return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+ int rb_space = atomic_read(&rl->rb_space);
+
+ if (unlikely(rb_space >= 0))
+ atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+ return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
if (free_blocks >= rl->high) {
- rl->rb_user_max = max - rl->rb_gc_rsv;
- rl->rb_gc_max = rl->rb_gc_rsv;
+ rl->rb_user_max = max;
+ rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
- int gc_max;
rl->rb_user_max = user_max;
- gc_max = max - rl->rb_user_max;
- rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
- if (free_blocks > rl->low)
- rl->rb_state = PBLK_RL_MID;
- else
- rl->rb_state = PBLK_RL_LOW;
+ rl->rb_gc_max = max - user_max;
+
+ if (free_blocks <= rl->rsv_blocks) {
+ rl->rb_user_max = 0;
+ rl->rb_gc_max = max;
+ }
+
+ /* In the worst case, we will need to GC lines in the low list
+ * (high valid sector count). If there are lines to GC on high
+ * or mid lists, these will be prioritized
+ */
+ rl->rb_state = PBLK_RL_LOW;
}
return rl->rb_state;
}
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
- rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
int blk_in_line = atomic_read(&line->blk_in_line);
- int ret;
atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+ int ret;
/* Rates will not change that often - no need to lock update */
ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
pblk_gc_should_stop(pblk);
}
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+ return rl->low;
+}
+
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
{
return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
void pblk_rl_init(struct pblk_rl *rl, int budget)
{
+ struct pblk *pblk = container_of(rl, struct pblk, rl);
+ struct pblk_line_meta *lm = &pblk->lm;
+ int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
- rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
rl->high_pw = get_count_order(rl->high);
+ rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+ if (rl->low < min_blocks)
+ rl->low = min_blocks;
+
+ rl->rsv_blocks = min_blocks;
+
/* This will always be a power-of-2 */
rb_windows = budget / PBLK_MAX_REQ_ADDRS;
- rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+ rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
- atomic_set(&rl->rb_user_cnt, 0);
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
+
+ atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
+ atomic_set(&rl->rb_space, -1);
setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
rl->rb_user_active = 0;
+ rl->rb_gc_active = 0;
}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1ceeff..95fb434e2f01 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
int free_blocks, total_blocks;
int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+ int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks);
rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_rsv = pblk->rl.rb_gc_rsv;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state;
- total_blocks = geo->blks_per_lun * geo->nr_luns;
+ total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+ "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
rb_user_cnt,
rb_user_max,
rb_gc_cnt,
rb_gc_max,
- rb_gc_rsv,
rb_state,
rb_budget,
pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
ssize_t sz = 0;
int nr_free_lines;
int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0;
+ int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int free = 0, bad = 0, cor = 0;
- int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+ int bad = 0, cor = 0;
+ int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
free_line_cnt++;
spin_unlock(&l_mg->free_lock);
+ spin_lock(&l_mg->close_lock);
+ list_for_each_entry(line, &l_mg->emeta_list, list)
+ emeta_line_cnt++;
+ spin_unlock(&l_mg->close_lock);
+
spin_lock(&l_mg->gc_lock);
list_for_each_entry(line, &l_mg->gc_full_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
gc_empty++;
}
- list_for_each_entry(line, &l_mg->free_list, list)
- free++;
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
if (l_mg->data_line) {
cur_sec = l_mg->data_line->cur_sec;
msecs = l_mg->data_line->left_msecs;
- ssecs = l_mg->data_line->left_ssecs;
- vsc = l_mg->data_line->vsc;
+ vsc = le32_to_cpu(*l_mg->data_line->vsc);
sec_in_line = l_mg->data_line->sec_in_line;
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt)
- pr_err("pblk: corrupted free line list\n");
+ pr_err("pblk: corrupted free line list:%d/%d\n",
+ nr_free_lines, free_line_cnt);
sz = snprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+ "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
- free, nr_free_lines, bad, cor,
+ nr_free_lines,
+ emeta_line_cnt, meta_weight,
closed_line_cnt,
+ bad, cor,
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
atomic_read(&pblk->gc.inflight_gc));
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line, meta_weight);
+ "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+ cur_data, cur_sec, msecs, vsc, sec_in_line,
+ map_weight, lm->sec_per_line,
+ atomic_read(&pblk->inflight_io));
return sz;
}
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
lm->smeta_len, lm->smeta_sec);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len, lm->emeta_sec,
+ lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
return sz;
}
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->compl_writes),
atomic_long_read(&pblk->recov_writes),
atomic_long_read(&pblk->recov_gc_writes),
atomic_long_read(&pblk->recov_gc_reads),
+ atomic_long_read(&pblk->cache_reads),
atomic_long_read(&pblk->sync_reads));
}
#endif
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+ size_t len)
{
- struct pblk_gc *gc = &pblk->gc;
size_t c_len;
- int value;
+ int force;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &value))
+ if (kstrtouint(page, 0, &force))
return -EINVAL;
- spin_lock(&gc->lock);
- pblk_rl_set_gc_rsc(&pblk->rl, value);
- spin_unlock(&gc->lock);
+ pblk_gc_sysfs_force(pblk, force);
return len;
}
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+ const char *page, size_t len)
{
size_t c_len;
- int force;
+ int sec_per_write;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &force))
+ if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
- if (force < 0 || force > 1)
+ if (sec_per_write < pblk->min_write_pgs
+ || sec_per_write > pblk->max_write_pgs
+ || sec_per_write % pblk->min_write_pgs != 0)
return -EINVAL;
- pblk_gc_sysfs_force(pblk, force);
+ pblk_set_sec_per_write(pblk, sec_per_write);
return len;
}
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
.mode = 0200,
};
-static struct attribute sys_gc_rl_max = {
- .name = "gc_rl_max",
- .mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+ .name = "max_sec_per_write",
+ .mode = 0644,
};
#ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
&sys_errors_attr,
&sys_gc_state,
&sys_gc_force,
- &sys_gc_rl_max,
+ &sys_max_sec_per_write,
&sys_rb_attr,
&sys_stats_ppaf_attr,
&sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_lines(pblk, buf);
else if (strcmp(attr->name, "lines_info") == 0)
return pblk_sysfs_lines_info(pblk, buf);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_get_sec_per_write(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
- if (strcmp(attr->name, "gc_rl_max") == 0)
- return pblk_sysfs_rate_store(pblk, buf, len);
- else if (strcmp(attr->name, "gc_force") == 0)
+ if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_set_sec_per_write(pblk, buf, len);
return 0;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..d62a8f4faaf4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
#include "pblk.h"
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_inc(&pblk->sync_writes);
-#endif
-
- /* Counter protected by rb sync lock */
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
for (i = 0; i < c_ctx->nr_valid; i++) {
struct pblk_w_ctx *w_ctx;
- struct ppa_addr p;
- struct pblk_line *line;
w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
- p = rqd->ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(p)];
- pblk_sync_line(pblk, line);
-
while ((original_bio = bio_list_pop(&w_ctx->bios)))
bio_endio(original_bio);
}
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+ atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
}
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->kw_wq, &recovery->ws_rec);
+ queue_work(pblk->close_wq, &recovery->ws_rec);
out:
pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
}
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
#endif
pblk_complete_write(pblk, rqd, c_ctx);
+ atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+ struct pblk *pblk = rqd->private;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_line *line = m_ctx->private;
+ struct pblk_emeta *emeta = line->emeta;
+ int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+ struct pblk_lun *rlun = &pblk->luns[pos];
+ int sync;
+
+ up(&rlun->wr_sem);
+
+ if (rqd->error) {
+ pblk_log_write_err(pblk, rqd);
+ pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+ }
+#ifdef CONFIG_NVM_DEBUG
+ else
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+ sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+ if (sync == emeta->nr_entries)
+ pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+ pblk->close_wq);
+
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, READ);
+
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs)
+ unsigned int nr_secs,
+ nvm_end_io_fn(*end_io))
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
rqd->nr_ppas = nr_secs;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
rqd->private = pblk;
- rqd->end_io = pblk_end_io_write;
+ rqd->end_io = end_io;
rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
&rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
+ struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
- struct ppa_addr erase_ppa;
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
int ret = 0;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!lun_bitmap)
+ return -ENOMEM;
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+ ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
if (ret) {
kfree(lun_bitmap);
- goto out;
+ return ret;
}
- ppa_set_empty(&erase_ppa);
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
else
pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, &erase_ppa);
-
-out:
- if (unlikely(e_line && !ppa_empty(erase_ppa))) {
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- up(&pblk->erase_sem);
- }
- }
+ valid, erase_ppa);
- return ret;
+ return 0;
}
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+ ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
if (ret)
return ret;
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync;
}
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line *data_line;
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int i;
+
+ data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+ return 1;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on a LUN deadlock. In this case, modify
+ * the distance to not be optimal, but allow metadata I/Os to succeed.
+ */
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+ data_line->meta_distance--;
+ return 0;
+ }
+
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+ ppa_list[i].g.lun == ppa_opt.g.lun)
+ return 1;
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa.g.ch &&
+ ppa_list[i].g.lun == ppa.g.lun)
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = meta_line->emeta;
+ struct pblk_g_ctx *m_ctx;
+ struct pblk_lun *rlun;
+ struct bio *bio;
+ struct nvm_rq *rqd;
+ void *data;
+ u64 paddr;
+ int rq_ppas = pblk->min_write_pgs;
+ int id = meta_line->id;
+ int rq_len;
+ int i, j;
+ int ret;
+
+ rqd = pblk_alloc_rqd(pblk, READ);
+ if (IS_ERR(rqd)) {
+ pr_err("pblk: cannot allocate write req.\n");
+ return PTR_ERR(rqd);
+ }
+ m_ctx = nvm_rq_to_pdu(rqd);
+ m_ctx->private = meta_line;
+
+ rq_len = rq_ppas * geo->sec_size;
+ data = ((void *)emeta->buf) + emeta->mem;
+
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
+ bio->bi_iter.bi_sector = 0; /* internal bio */
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ rqd->bio = bio;
+
+ ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+ if (ret)
+ goto fail_free_bio;
+
+ for (i = 0; i < rqd->nr_ppas; ) {
+ spin_lock(&meta_line->lock);
+ paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+ spin_unlock(&meta_line->lock);
+ for (j = 0; j < rq_ppas; j++, i++, paddr++)
+ rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+ }
+
+ rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+ ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+ if (ret) {
+ pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+ goto fail_free_bio;
+ }
+
+ emeta->mem += rq_len;
+ if (emeta->mem >= lm->emeta_len[0]) {
+ spin_lock(&l_mg->close_lock);
+ list_del(&meta_line->list);
+ WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+ "pblk: corrupt meta line %d\n", meta_line->id);
+ spin_unlock(&l_mg->close_lock);
+ }
+
+ ret = pblk_submit_io(pblk, rqd);
+ if (ret) {
+ pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+ goto fail_rollback;
+ }
+
+ return NVM_IO_OK;
+
+fail_rollback:
+ spin_lock(&l_mg->close_lock);
+ pblk_dealloc_page(pblk, meta_line, rq_ppas);
+ list_add(&meta_line->list, &meta_line->list);
+ spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+ if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, READ);
+ return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+ int prev_n)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line *meta_line;
+
+ spin_lock(&l_mg->close_lock);
+retry:
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return 0;
+ }
+ meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+ if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+ goto retry;
+ spin_unlock(&l_mg->close_lock);
+
+ if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+ return 0;
+
+ return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct ppa_addr erase_ppa;
+ int err;
+
+ ppa_set_empty(&erase_ppa);
+
+ /* Assign lbas to ppas and populate request structure */
+ err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ if (err) {
+ pr_err("pblk: could not setup write request: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ if (likely(ppa_empty(erase_ppa))) {
+ /* Submit metadata write for previous data line */
+ err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+ } else {
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit available erase for next data line */
+ if (pblk_blk_erase_async(pblk, erase_ppa)) {
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int bit;
+
+ atomic_inc(&e_line->left_eblks);
+ bit = pblk_ppa_to_pos(geo, erase_ppa);
+ WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+ }
+ }
+
+ return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct bio *bio = rqd->bio;
+
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
static int pblk_submit_write(struct pblk *pblk)
{
struct bio *bio;
struct nvm_rq *rqd;
- struct pblk_c_ctx *c_ctx;
- unsigned int pgs_read;
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush;
unsigned long pos;
- int err;
/* If there are no sectors in the cache, flushes (bios without data)
* will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
pr_err("pblk: cannot allocate write req.\n");
return 1;
}
- c_ctx = nvm_rq_to_pdu(rqd);
bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
- secs_to_sync, secs_avail);
- if (!pgs_read) {
+ if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+ secs_avail)) {
pr_err("pblk: corrupted write bio\n");
goto fail_put_bio;
}
- if (c_ctx->nr_padded)
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
- goto fail_put_bio;
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx);
- if (err) {
- pr_err("pblk: could not setup write request\n");
- goto fail_free_bio;
- }
-
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: I/O submission failed: %d\n", err);
+ if (pblk_submit_io_set(pblk, rqd))
goto fail_free_bio;
- }
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
return 0;
fail_free_bio:
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+ pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186b5288..15931381348c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
#define PBLK_MAX_REQ_ADDRS (64)
#define PBLK_MAX_REQ_ADDRS_PW (6)
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
#define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
PBLK_BLK_ST_CLOSED = 0x2,
};
+struct pblk_sec_meta {
+ u64 reserved;
+ __le64 lba;
+};
+
/* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization.
*/
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
enum {
PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
PBLK_RL_LOW = 3,
};
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
-/* write completion context */
+/* write buffer completion context */
struct pblk_c_ctx {
struct list_head list; /* Head for out-of-order completion */
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
unsigned int nr_padded;
};
-/* Read context */
-struct pblk_r_ctx {
- struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+ void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+ struct pblk *pblk;
+ struct completion wait;
+ struct kref ref;
};
/* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
struct pblk_gc_rq {
struct pblk_line *line;
void *data;
- u64 *lba_list;
+ u64 lba_list[PBLK_MAX_REQ_ADDRS];
int nr_secs;
int secs_to_gc;
struct list_head list;
};
struct pblk_gc {
+ /* These states are not protected by a lock since (i) they are in the
+ * fast path, and (ii) they are not critical.
+ */
int gc_active;
int gc_enabled;
int gc_forced;
- int gc_jobs_active;
- atomic_t inflight_gc;
struct task_struct *gc_ts;
struct task_struct *gc_writer_ts;
+ struct task_struct *gc_reader_ts;
+
+ struct workqueue_struct *gc_line_reader_wq;
struct workqueue_struct *gc_reader_wq;
+
struct timer_list gc_timer;
+ struct semaphore gc_sem;
+ atomic_t inflight_gc;
int w_entries;
+
struct list_head w_list;
+ struct list_head r_list;
spinlock_t lock;
spinlock_t w_lock;
+ spinlock_t r_lock;
};
struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
*/
unsigned int high_pw; /* High rounded up as a power of 2 */
-#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent
- * available blks
- */
-#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
int rb_windows_pw; /* Number of rate windows in the write buffer
* given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
*/
int rb_budget; /* Total number of entries available for I/O */
int rb_user_max; /* Max buffer entries available for user I/O */
- atomic_t rb_user_cnt; /* User I/O buffer counter */
int rb_gc_max; /* Max buffer entries available for GC I/O */
int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
int rb_state; /* Rate-limiter current state */
+
+ atomic_t rb_user_cnt; /* User I/O buffer counter */
atomic_t rb_gc_cnt; /* GC I/O buffer counter */
+ atomic_t rb_space; /* Space limit in case of reaching capacity */
+
+ int rsv_blocks; /* Reserved blocks for GC */
int rb_user_active;
+ int rb_gc_active;
+
struct timer_list u_timer;
unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
atomic_t free_blocks;
};
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
#define PBLK_LINE_EMPTY (~0U)
enum {
@@ -310,16 +334,19 @@ struct line_smeta {
__le32 window_wr_lun; /* Number of parallel LUNs to write */
__le32 rsvd[2];
+
+ __le64 lun_bitmap[];
};
/*
- * Metadata Layout:
- * 1. struct pblk_emeta
- * 2. nr_lbas u64 forming lba list
- * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- * 4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- * 3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ * First sector:
+ * 1. struct line_emeta
+ * 2. bad block bitmap (u64 * window_wr_lun)
+ * Mid sectors (start at lbas_sector):
+ * 3. nr_lbas (u64) forming lba list
+ * Last sectors (start at vsc_sector):
+ * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
*/
struct line_emeta {
struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
__le32 next_id; /* Line id for next line */
__le64 nr_lbas; /* Number of lbas mapped in line */
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
+ __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+ struct line_emeta *buf; /* emeta buffer in media format */
+ int mem; /* Write offset - points to next
+ * writable entry in memory
+ */
+ atomic_t sync; /* Synced - backpointer that signals the
+ * last entry that has been successfully
+ * persisted to media
+ */
+ unsigned int nr_entries; /* Number of emeta entries */
+};
+
+struct pblk_smeta {
+ struct line_smeta *buf; /* smeta buffer in persistent format */
};
struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
- struct line_smeta *smeta; /* Start metadata */
- struct line_emeta *emeta; /* End metadata */
+ struct pblk_smeta *smeta; /* Start metadata */
+ struct pblk_emeta *emeta; /* End medatada */
+
int meta_line; /* Metadata line id */
+ int meta_distance; /* Distance between data and metadata */
+
u64 smeta_ssec; /* Sector where smeta starts */
u64 emeta_ssec; /* Sector where emeta starts */
@@ -374,9 +421,10 @@ struct pblk_line {
atomic_t left_seblks; /* Blocks left for sync erasing */
int left_msecs; /* Sectors left for mapping */
- int left_ssecs; /* Sectors left to sync */
unsigned int cur_sec; /* Sector map pointer */
- unsigned int vsc; /* Valid sector count in line */
+ unsigned int nr_valid_lbas; /* Number of valid lbas in line */
+
+ __le32 *vsc; /* Valid sector count in line */
struct kref ref; /* Write buffer L2P references */
@@ -385,13 +433,15 @@ struct pblk_line {
#define PBLK_DATA_LINES 4
-enum{
+enum {
PBLK_KMALLOC_META = 1,
PBLK_VMALLOC_META = 2,
};
-struct pblk_line_metadata {
- void *meta;
+enum {
+ PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
+ PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
+ PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
};
struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
struct list_head bad_list; /* Full lines bad */
/* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+ struct list_head *gc_lists[PBLK_GC_NR_LISTS];
struct list_head gc_high_list; /* Full lines ready to GC, high isc */
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
struct pblk_line *log_next; /* Next FTL log line */
struct pblk_line *data_next; /* Next data line */
+ struct list_head emeta_list; /* Lines queued to schedule emeta */
+
+ __le32 *vsc_list; /* Valid sector counts for all lines */
+
/* Metadata allocation type: VMALLOC | KMALLOC */
- int smeta_alloc_type;
int emeta_alloc_type;
/* Pre-allocated metadata for data lines */
- struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
- struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+ struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+ struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
unsigned long meta_bitmap;
/* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
unsigned long l_seq_nr; /* Log line unique sequence number */
spinlock_t free_lock;
+ spinlock_t close_lock;
spinlock_t gc_lock;
};
struct pblk_line_meta {
unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta*/
- unsigned int emeta_len; /* Total length for emeta */
- unsigned int emeta_sec; /* Sectors needed for emeta*/
+ unsigned int smeta_sec; /* Sectors needed for smeta */
+
+ unsigned int emeta_len[4]; /* Lengths for emeta:
+ * [0]: Total length
+ * [1]: struct line_emeta length
+ * [2]: L2P portion length
+ * [3]: vsc list length
+ */
+ unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
+ * as emeta_len
+ */
+
unsigned int emeta_bb; /* Boundary for bb that affects emeta */
+
+ unsigned int vsc_list_len; /* Length for vsc list */
unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
unsigned int blk_bitmap_len; /* Length for block bitmap in line */
unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
unsigned int blk_per_line; /* Number of blocks in a full line */
unsigned int sec_per_line; /* Number of sectors in a line */
+ unsigned int dsec_per_line; /* Number of data sectors in a line */
unsigned int min_blk_line; /* Min. number of good blocks in line */
unsigned int mid_thrs; /* Threshold for GC mid list */
unsigned int high_thrs; /* Threshold for GC high list */
+
+ unsigned int meta_distance; /* Distance between data and metadata */
};
struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
u8 sec_offset;
};
+enum {
+ PBLK_STATE_RUNNING = 0,
+ PBLK_STATE_STOPPING = 1,
+ PBLK_STATE_RECOVERING = 2,
+ PBLK_STATE_STOPPED = 3,
+};
+
struct pblk {
struct nvm_tgt_dev *dev;
struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
struct pblk_rb rwb;
+ int state; /* pblk line state */
+
int min_write_pgs; /* Minimum amount of pages required by controller */
int max_write_pgs; /* Maximum amount of pages supported by controller */
int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
/* pblk provisioning values. Used by rate limiter */
struct pblk_rl rl;
- struct semaphore erase_sem;
+ int sec_per_write;
unsigned char instance_uuid[16];
#ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
atomic_long_t req_writes; /* Sectors stored on write buffer */
atomic_long_t sub_writes; /* Sectors submitted from buffer */
atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t compl_writes; /* Sectors completed in write bio */
atomic_long_t inflight_reads; /* Inflight sector read requests */
+ atomic_long_t cache_reads; /* Read requests that hit the cache */
atomic_long_t sync_reads; /* Completed sector read requests */
atomic_long_t recov_writes; /* Sectors submitted from recovery */
atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
atomic_long_t write_failed;
atomic_long_t erase_failed;
+ atomic_t inflight_io; /* General inflight I/O counter */
+
struct task_struct *writer_ts;
/* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
mempool_t *page_pool;
mempool_t *line_ws_pool;
mempool_t *rec_pool;
- mempool_t *r_rq_pool;
+ mempool_t *g_rq_pool;
mempool_t *w_rq_pool;
mempool_t *line_meta_pool;
- struct workqueue_struct *kw_wq;
+ struct workqueue_struct *close_wq;
+ struct workqueue_struct *bb_wq;
+
struct timer_list wtimer;
struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
struct work_struct ws;
};
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
/*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
unsigned int pos);
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count);
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
struct list_head *list,
unsigned int max);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter);
+ struct ppa_addr ppa, int bio_iter);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
* pblk core
*/
struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx);
void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
void pblk_discard(struct pblk *pblk, struct bio *bio);
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask);
+ int alloc_type, gfp_t gfp_mask);
struct pblk_line *pblk_line_get(struct pblk *pblk);
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line);
void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
void pblk_line_mark_bb(struct work_struct *work);
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *));
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq);
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf);
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
void pblk_line_put(struct kref *ref);
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
void pblk_end_io_sync(struct nvm_rq *rqd);
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
int nr_pages);
void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr);
void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
/*
* pblk read path
*/
+extern struct bio_set *pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
*/
void pblk_submit_rec(struct work_struct *work);
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
/*
* pblk gc
*/
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk);
void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
/*
* pblk rate limiter
*/
void pblk_rl_init(struct pblk_rl *rl, int budget);
void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
/*
* pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
return c_ctx - sizeof(struct nvm_rq);
}
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+ return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+ return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
{
- return (emeta) + 1;
+ return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+ int vsc;
+
+ spin_lock(&line->lock);
+ vsc = le32_to_cpu(*line->vsc);
+ spin_unlock(&line->lock);
+
+ return vsc;
}
#define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
ppa_addr->ppa = ADDR_EMPTY;
}
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+ if (lppa.ppa == rppa.ppa)
+ return true;
+
+ return false;
+}
+
static inline int pblk_addr_in_cache(struct ppa_addr ppa)
{
return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
}
static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_smeta *smeta)
+ struct line_header *header)
{
u32 crc = ~(u32)0;
- crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+ crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
crc = crc32_le(crc, (unsigned char *)emeta +
sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len -
+ lm->emeta_len[0] -
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
return flags;
}
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+ PBLK_READ_RANDOM = 0,
+ PBLK_READ_SEQUENTIAL = 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int flags;
+
+ flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ if (type == PBLK_READ_SEQUENTIAL)
+ flags |= geo->plane_mode >> 1;
+
+ return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
{
- return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ return !(nr_secs % pblk->min_write_pgs);
}
#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
{
struct completion *waiting = bio->bi_private;
- if (bio->bi_error)
- pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
complete(waiting);
}
@@ -359,7 +359,7 @@ try:
goto finished;
}
wait_for_completion_io(&wait);
- if (bio->bi_error) {
+ if (bio->bi_status) {
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
@@ -385,7 +385,7 @@ try:
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
- if (bio->bi_error)
+ if (bio->bi_status)
goto finished;
bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
struct nvm_rq *rqd;
int err;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_op(bio) == REQ_OP_DISCARD) {
rrpc_discard(rrpc, bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
/* Forward declarations */
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+ blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+ const char *);
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
bch_submit_bbio(bio, b->c, &b->key, 0);
closure_sync(&cl);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct btree *b = container_of(cl, struct btree, io);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
- bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+ bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
closure_put(cl);
}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_clone(bio, GFP_NOIO);
+ check = bio_clone_kmalloc(bio, GFP_NOIO);
if (!check)
return;
check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
/* IO errors */
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
{
/*
* The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
}
void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct bbio *b = container_of(bio, struct bbio, bio);
struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
}
void bch_bbio_endio(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct closure *cl = bio->bi_private;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
{
struct journal_write *w = bio->bi_private;
- cache_set_err_on(bio->bi_error, w->c, "journal io error");
+ cache_set_err_on(bio->bi_status, w->c, "journal io error");
closure_put(&w->c->journal.io);
}
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, cl);
- if (bio->bi_error)
- io->op.error = bio->bi_error;
+ if (bio->bi_status)
+ io->op.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(io->op.c, &b->key, 0)) {
- io->op.error = -EINTR;
+ io->op.status = BLK_STS_IOERR;
}
- bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+ bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
}
static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct data_insert_op *op = &io->op;
- if (!op->error) {
+ if (!op->status) {
moving_init(io);
io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
if (ret == -ESRCH) {
op->replace_collision = true;
} else if (ret) {
- op->error = -ENOMEM;
+ op->status = BLK_STS_RESOURCE;
op->insert_data_done = true;
}
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* TODO: We could try to recover from this. */
if (op->writeback)
- op->error = bio->bi_error;
+ op->status = bio->bi_status;
else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, op->wq);
else
set_closure_fn(cl, NULL, NULL);
}
- bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+ bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
}
static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
* from the backing device.
*/
- if (bio->bi_error)
- s->iop.error = bio->bi_error;
+ if (bio->bi_status)
+ s->iop.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(s->iop.c, &b->key, 0)) {
atomic_long_inc(&s->iop.c->cache_read_races);
- s->iop.error = -EINTR;
+ s->iop.status = BLK_STS_IOERR;
}
- bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+ bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
}
/*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
- s->iop.error = bio->bi_error;
+ s->iop.status = bio->bi_status;
/* Only cache read errors are recoverable */
s->recoverable = false;
}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
&s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
- s->orig_bio->bi_error = s->iop.error;
+ s->orig_bio->bi_status = s->iop.status;
bio_endio(s->orig_bio);
s->orig_bio = NULL;
}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.inode = d->id;
s->iop.write_point = hash_long((unsigned long) current, 16);
s->iop.write_prio = 0;
- s->iop.error = 0;
+ s->iop.status = 0;
s->iop.flags = 0;
s->iop.flush_journal = op_is_flush(bio->bi_opf);
s->iop.wq = bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
- s->iop.error = 0;
+ s->iop.status = 0;
do_bio_hook(s, s->orig_bio);
/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
!s->cache_miss, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
- if (s->iop.error)
+ if (s->iop.status)
continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
else if (s->iop.bio || verify(dc, &s->bio.bio))
continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
unsigned inode;
uint16_t write_point;
uint16_t write_prio;
- short error;
+ blk_status_t status;
union {
uint16_t flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+ bch_count_io_errors(ca, bio->bi_status, "writing superblock");
closure_put(&ca->set->sb_write);
}
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
- cache_set_err_on(bio->bi_error, c, "accessing uuids");
+ cache_set_err_on(bio->bi_status, c, "accessing uuids");
bch_bbio_free(bio, c);
closure_put(cl);
}
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+ cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
bch_bbio_free(bio, ca->set);
closure_put(&ca->prio);
}
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
minor *= BCACHE_MINORS;
- if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
ida_simple_remove(&bcache_minor, minor);
return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sizeof(struct bbio) + sizeof(struct bio_vec) *
bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
- !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
struct keybuf_key *w = bio->bi_private;
struct dirty_io *io = w->private;
- if (bio->bi_error)
+ if (bio->bi_status)
SET_KEY_DIRTY(&w->key, false);
closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
struct dirty_io *io = w->private;
bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
- bio->bi_error, "reading dirty data from cache");
+ bio->bi_status, "reading dirty data from cache");
dirty_endio(bio);
}
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error)
+ struct dm_bio_prison_cell *cell, blk_status_t error)
{
struct bio_list bios;
struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
dm_cell_release(prison, cell, &bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error);
+ struct dm_bio_prison_cell *cell, blk_status_t error);
/*
* Visits the cell and then releases. Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 840c1496b2b1..850ff6c67994 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
enum data_mode data_mode;
unsigned char list_mode; /* LIST_* */
unsigned hold_count;
- int read_error;
- int write_error;
+ blk_status_t read_error;
+ blk_status_t write_error;
unsigned long state;
unsigned long last_accessed;
struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
{
struct dm_buffer *b = context;
- b->bio.bi_error = error ? -EIO : 0;
+ b->bio.bi_status = error ? BLK_STS_IOERR : 0;
b->bio.bi_end_io(&b->bio);
}
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
r = dm_io(&io_req, 1, &region, NULL);
if (r) {
- b->bio.bi_error = r;
+ b->bio.bi_status = errno_to_blk_status(r);
end_io(&b->bio);
}
}
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
static void inline_endio(struct bio *bio)
{
bio_end_io_t *end_fn = bio->bi_private;
- int error = bio->bi_error;
+ blk_status_t status = bio->bi_status;
/*
* Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
*/
bio_reset(bio);
- bio->bi_error = error;
+ bio->bi_status = status;
end_fn(bio);
}
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->write_error = bio->bi_error;
- if (unlikely(bio->bi_error)) {
+ b->write_error = bio->bi_status;
+ if (unlikely(bio->bi_status)) {
struct dm_bufio_client *c = b->c;
- int error = bio->bi_error;
- (void)cmpxchg(&c->async_write_error, 0, error);
+
+ (void)cmpxchg(&c->async_write_error, 0,
+ blk_status_to_errno(bio->bi_status));
}
BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->read_error = bio->bi_error;
+ b->read_error = bio->bi_status;
BUG_ON(!test_bit(B_READING, &b->state));
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
- int error = b->read_error;
+ int error = blk_status_to_errno(b->read_error);
dm_bufio_release(b);
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
*/
int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
{
- int a, f;
+ blk_status_t a;
+ int f;
unsigned long buffers_processed = 0;
struct dm_buffer *b, *tmp;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d682a0511381..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
*/
struct continuation {
struct work_struct ws;
- int input;
+ blk_status_t input;
};
static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
/*
* The operation that everyone is waiting for.
*/
- int (*commit_op)(void *context);
+ blk_status_t (*commit_op)(void *context);
void *commit_context;
/*
@@ -171,8 +171,7 @@ struct batcher {
static void __commit(struct work_struct *_ws)
{
struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
- int r;
+ blk_status_t r;
unsigned long flags;
struct list_head work_items;
struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
while ((bio = bio_list_pop(&bios))) {
if (r) {
- bio->bi_error = r;
+ bio->bi_status = r;
bio_endio(bio);
} else
b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
}
static void batcher_init(struct batcher *b,
- int (*commit_op)(void *),
+ blk_status_t (*commit_op)(void *),
void *commit_context,
void (*issue_op)(struct bio *bio, void *),
void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error) {
+ if (bio->bi_status) {
bio_endio(bio);
return;
}
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
if (read_err || write_err)
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
queue_continuation(mg->cache->wq, &mg->k);
}
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error)
- mg->k.input = bio->bi_error;
+ if (bio->bi_status)
+ mg->k.input = bio->bi_status;
queue_continuation(mg->cache->wq, &mg->k);
}
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
if (mg->overwrite_bio) {
if (success)
force_set_dirty(cache, cblock);
+ else if (mg->k.input)
+ mg->overwrite_bio->bi_status = mg->k.input;
else
- mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+ mg->overwrite_bio->bi_status = BLK_STS_IOERR;
bio_endio(mg->overwrite_bio);
} else {
if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
r = copy(mg, is_policy_promote);
if (r) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
/*
* Used by the batcher.
*/
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
{
struct cache *cache = context;
if (dm_cache_changed_this_transaction(cache->cmd))
- return commit(cache, false);
+ return errno_to_blk_status(commit(cache, false));
return 0;
}
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
bio_list_init(&cache->deferred_bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
}
}
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r;
}
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct cache *cache = ti->private;
unsigned long flags;
@@ -2838,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
bio_drop_shared_lock(cache, bio);
accounted_complete(cache, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static int write_dirty_bitset(struct cache *cache)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72d479b..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
struct convert_context ctx;
atomic_t io_pending;
- int error;
+ blk_status_t error;
sector_t sector;
struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
*/
case -EBADMSG:
atomic_dec(&ctx->cc_pending);
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
/*
* There was an error while processing the request.
*/
default:
atomic_dec(&ctx->cc_pending);
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio;
- int error = io->error;
+ blk_status_t error = io->error;
if (!atomic_dec_and_test(&io->io_pending))
return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
else
kfree(io->integrity_metadata);
- base_bio->bi_error = error;
+ base_bio->bi_status = error;
bio_endio(base_bio);
}
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
struct dm_crypt_io *io = clone->bi_private;
struct crypt_config *cc = io->cc;
unsigned rw = bio_data_dir(clone);
- int error;
+ blk_status_t error;
/*
* free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
if (rw == WRITE)
crypt_free_buffer_pages(cc, clone);
- error = clone->bi_error;
+ error = clone->bi_status;
bio_put(clone);
if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
crypt_inc_pending(io);
if (kcryptd_io_read(io, GFP_NOIO))
- io->error = -ENOMEM;
+ io->error = BLK_STS_RESOURCE;
crypt_dec_pending(io);
}
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
sector_t sector;
struct rb_node **rbp, *parent;
- if (unlikely(io->error < 0)) {
+ if (unlikely(io->error)) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
struct bio *clone;
int crypt_finished;
sector_t sector = io->sector;
- int r;
+ blk_status_t r;
/*
* Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
if (unlikely(!clone)) {
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
goto dec;
}
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
- int r = 0;
+ blk_status_t r;
crypt_inc_pending(io);
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
io->sector);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (error == -EBADMSG) {
DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
(unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
- io->error = -EILSEQ;
+ io->error = BLK_STS_PROTECTION;
} else if (error < 0)
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- cc->bs = bioset_create(MIN_IOS, 0);
+ cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
* and is aligned to this size as defined in IO hints.
*/
if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
- return -EIO;
+ return DM_MAPIO_KILL;
if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a182611..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
if (bio_data_dir(bio) == READ) {
if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
!test_bit(ERROR_WRITES, &fc->flags))
- return -EIO;
+ return DM_MAPIO_KILL;
goto map_bio;
}
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
/*
* By default, error all I/O.
*/
- return -EIO;
+ return DM_MAPIO_KILL;
}
map_bio:
@@ -358,12 +358,13 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+ if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
/*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
* Error read during the down_interval if drop_writes
* and error_writes were not configured.
*/
- return -EIO;
+ *error = BLK_STS_IOERR;
}
}
- return error;
+ return DM_ENDIO_DONE;
}
static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 93b181088168..1b224aa9cf15 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
unsigned metadata_offset;
atomic_t in_flight;
- int bi_error;
+ blk_status_t bi_status;
struct completion *completion;
@@ -1118,8 +1118,8 @@ static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *
static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
{
int r = dm_integrity_failed(ic);
- if (unlikely(r) && !bio->bi_error)
- bio->bi_error = r;
+ if (unlikely(r) && !bio->bi_status)
+ bio->bi_status = errno_to_blk_status(r);
bio_endio(bio);
}
@@ -1127,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
{
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+ if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
submit_flush_bio(ic, dio);
else
do_endio(ic, bio);
@@ -1146,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->bi_error) && !bio->bi_error)
- bio->bi_error = dio->bi_error;
- if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+ if (unlikely(dio->bi_status) && !bio->bi_status)
+ bio->bi_status = dio->bi_status;
+ if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1322,7 +1322,7 @@ skip_io:
dec_in_flight(dio);
return;
error:
- dio->bi_error = r;
+ dio->bi_status = errno_to_blk_status(r);
dec_in_flight(dio);
}
@@ -1335,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
sector_t area, offset;
dio->ic = ic;
- dio->bi_error = 0;
+ dio->bi_status = 0;
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
submit_flush_bio(ic, dio);
@@ -1356,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
(unsigned long long)dio->range.logical_sector, bio_sectors(bio),
(unsigned long long)ic->provided_data_sectors);
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
ic->sectors_per_block,
(unsigned long long)dio->range.logical_sector, bio_sectors(bio));
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (ic->sectors_per_block > 1) {
@@ -1372,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
bv.bv_offset, bv.bv_len, ic->sectors_per_block);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
}
@@ -1387,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
wanted_tag_size *= ic->tag_size;
if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
} else {
if (unlikely(bip != NULL)) {
DMERR("Unexpected integrity data when using internal hash");
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
if (unlikely(ic->mode == 'R') && unlikely(dio->write))
- return -EIO;
+ return DM_MAPIO_KILL;
get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 8d5ca30f6551..25039607f3cb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
if (!client->pool)
goto bad;
- client->bios = bioset_create(min_ios, 0);
+ client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!client->bios)
goto bad;
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
fn(error_bits, context);
}
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
{
if (error)
set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
{
struct io *io;
unsigned region;
- int error;
+ blk_status_t error;
- if (bio->bi_error && bio_data_dir(bio) == READ)
+ if (bio->bi_status && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
/*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
*/
retrieve_io_and_region_from_bio(bio, &io, &region);
- error = bio->bi_error;
+ error = bio->bi_status;
bio_put(bio);
dec_count(io, region, error);
@@ -319,7 +320,7 @@ static void do_region(int op, int op_flags, unsigned region,
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
atomic_inc(&io->count);
- dec_count(io, region, -EOPNOTSUPP);
+ dec_count(io, region, BLK_STS_NOTSUPP);
return;
}
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe38655a49..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
{
struct log_writes_c *lc = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
unsigned long flags;
- DMERR("Error writing log block, error=%d", bio->bi_error);
+ DMERR("Error writing log block, error=%d", bio->bi_status);
spin_lock_irqsave(&lc->blocks_lock, flags);
lc->logging_enabled = false;
spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
INIT_LIST_HEAD(&block->list);
pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct log_writes_c *lc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
spin_unlock_irqrestore(&lc->blocks_lock, flags);
}
- return error;
+ return DM_ENDIO_DONE;
}
/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3df056b73b66..0e8ab5bb3575 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -559,13 +559,13 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_REQUEUE;
dm_report_EIO(m);
- return -EIO;
+ return DM_MAPIO_KILL;
}
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
- bio->bi_error = 0;
+ bio->bi_status = 0;
bio->bi_bdev = pgpath->path.dev->bdev;
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
@@ -621,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
- if (r < 0 || r == DM_MAPIO_REQUEUE) {
- bio->bi_error = r;
+ switch (r) {
+ case DM_MAPIO_KILL:
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
- } else if (r == DM_MAPIO_REMAPPED)
+ break;
+ case DM_MAPIO_REMAPPED:
generic_make_request(bio);
+ break;
+ }
}
blk_finish_plug(&plug);
}
@@ -1442,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
activate_or_offline_path(pgpath);
}
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
{
switch (error) {
- case -EBADE:
- /*
- * EBADE signals an reservation conflict.
- * We shouldn't fail the path here as we can communicate with
- * the target. We should failover to the next path, but in
- * doing so we might be causing a ping-pong between paths.
- * So just return the reservation conflict error.
- */
- case -EOPNOTSUPP:
- case -EREMOTEIO:
- case -EILSEQ:
- case -ENODATA:
- case -ENOSPC:
+ case BLK_STS_NOTSUPP:
+ case BLK_STS_NOSPC:
+ case BLK_STS_TARGET:
+ case BLK_STS_NEXUS:
+ case BLK_STS_MEDIUM:
+ case BLK_STS_RESOURCE:
return 1;
}
@@ -1466,7 +1467,7 @@ static int noretry_error(int error)
}
static int multipath_end_io(struct dm_target *ti, struct request *clone,
- int error, union map_info *map_context)
+ blk_status_t error, union map_info *map_context)
{
struct dm_mpath_io *mpio = get_mpio(map_context);
struct pgpath *pgpath = mpio->pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- if (error == -EIO)
+ if (error == BLK_STS_IOERR)
dm_report_EIO(m);
/* complete with the original error */
r = DM_ENDIO_DONE;
@@ -1510,24 +1511,26 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
return r;
}
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
- int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+ blk_status_t *error)
{
+ struct multipath *m = ti->private;
+ struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct pgpath *pgpath = mpio->pgpath;
unsigned long flags;
+ int r = DM_ENDIO_DONE;
- if (!error)
- return 0; /* I/O complete */
-
- if (noretry_error(error))
- return error;
+ if (!*error || noretry_error(*error))
+ goto done;
- if (mpio->pgpath)
- fail_path(mpio->pgpath);
+ if (pgpath)
+ fail_path(pgpath);
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
dm_report_EIO(m);
- return -EIO;
+ *error = BLK_STS_IOERR;
+ goto done;
}
/* Queue for the daemon to resubmit */
@@ -1539,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
- return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
- struct multipath *m = ti->private;
- struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
- struct pgpath *pgpath;
- struct path_selector *ps;
- int r;
-
- BUG_ON(!mpio);
-
- r = do_end_io_bio(m, clone, error, mpio);
- pgpath = mpio->pgpath;
+ r = DM_ENDIO_INCOMPLETE;
+done:
if (pgpath) {
- ps = &pgpath->pg->ps;
+ struct path_selector *ps = &pgpath->pg->ps;
+
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4da8858856fb..a4fbd911d566 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -491,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
* If device is suspended, complete the bio.
*/
if (dm_noflush_suspending(ms->ti))
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
else
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
@@ -627,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
* degrade the array.
*/
if (bio_op(bio) == REQ_OP_DISCARD) {
- bio->bi_error = -EOPNOTSUPP;
+ bio->bi_status = BLK_STS_NOTSUPP;
bio_endio(bio);
return;
}
@@ -1210,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
if (r < 0 && r != -EWOULDBLOCK)
- return r;
+ return DM_MAPIO_KILL;
/*
* If region is not in-sync queue the bio.
*/
if (!r || (r == -EWOULDBLOCK)) {
if (bio->bi_opf & REQ_RAHEAD)
- return -EWOULDBLOCK;
+ return DM_MAPIO_KILL;
queue_bio(ms, bio, rw);
return DM_MAPIO_SUBMITTED;
@@ -1229,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
*/
m = choose_mirror(ms, bio->bi_iter.bi_sector);
if (unlikely(!m))
- return -EIO;
+ return DM_MAPIO_KILL;
dm_bio_record(&bio_record->details, bio);
bio_record->m = m;
@@ -1239,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
int rw = bio_data_dir(bio);
struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1255,16 +1256,16 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
if (!(bio->bi_opf & REQ_PREFLUSH) &&
bio_op(bio) != REQ_OP_DISCARD)
dm_rh_dec(ms->rh, bio_record->write_region);
- return error;
+ return DM_ENDIO_DONE;
}
- if (error == -EOPNOTSUPP)
+ if (*error == BLK_STS_NOTSUPP)
goto out;
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
+ if (bio->bi_opf & REQ_RAHEAD)
goto out;
- if (unlikely(error)) {
+ if (unlikely(*error)) {
if (!bio_record->details.bi_bdev) {
/*
* There wasn't enough memory to record necessary
@@ -1272,7 +1273,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
* mirror in-sync.
*/
DMERR_LIMIT("Mirror read failed.");
- return -EIO;
+ return DM_ENDIO_DONE;
}
m = bio_record->m;
@@ -1291,7 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
dm_bio_restore(bd, bio);
bio_record->details.bi_bdev = NULL;
- bio->bi_error = 0;
+ bio->bi_status = 0;
queue_bio(ms, bio, rw);
return DM_ENDIO_INCOMPLETE;
@@ -1302,7 +1303,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
out:
bio_record->details.bi_bdev = NULL;
- return error;
+ return DM_ENDIO_DONE;
}
static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7246ee..c6ebc5b1e00e 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
static void dm_mq_start_queue(struct request_queue *q)
{
- blk_mq_start_stopped_hw_queues(q, true);
+ blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
struct dm_rq_target_io *tio = info->tio;
struct bio *bio = info->orig;
unsigned int nr_bytes = info->orig->bi_iter.bi_size;
- int error = clone->bi_error;
+ blk_status_t error = clone->bi_status;
bio_put(clone);
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
* Do not use blk_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
- blk_update_request(tio->orig, 0, nr_bytes);
+ blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
}
static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
* Must be called without clone's queue lock held,
* see end_clone_request() for more details.
*/
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
{
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
rq_completed(md, rw, false);
}
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
{
int r = DM_ENDIO_DONE;
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
- if (unlikely(error == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (req_op(clone) == REQ_OP_WRITE_SAME &&
!clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
* Complete the clone and the original request with the error status
* through softirq context.
*/
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
{
struct dm_rq_target_io *tio = tio_from_request(rq);
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
* Target's rq_end_io() function isn't called.
* This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
*/
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
{
rq->rq_flags |= RQF_FAILED;
dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
/*
* Called with the clone's queue lock held (in the case of .request_fn)
*/
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
- int r;
+ blk_status_t r;
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
break;
case DM_MAPIO_KILL:
/* The target wants to complete the I/O */
- dm_kill_unmapped_request(rq, -EIO);
+ dm_kill_unmapped_request(rq, BLK_STS_IOERR);
break;
default:
DMWARN("unimplemented target map return value: %d", r);
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
return __dm_rq_init_rq(set->driver_data, rq);
}
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
}
if (ti->type->busy && ti->type->busy(ti))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
dm_start_request(md, rq);
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false);
blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
struct dm_target *ti;
struct request *orig, *clone;
struct kthread_work work;
- int error;
+ blk_status_t error;
union map_info info;
struct dm_stats_aux stats_aux;
unsigned long duration_jiffies;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d9817c81..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
{
void *callback_data = bio->bi_private;
- dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+ dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
}
static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
if (!s->valid)
- return -EIO;
+ return DM_MAPIO_KILL;
/* FIXME: should only take write lock if we need
* to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid || (unlikely(s->snapshot_overflowed) &&
bio_data_dir(bio) == WRITE)) {
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid || s->snapshot_overflowed) {
free_pending_exception(pe);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
DMERR("Snapshot overflowed: Unable to allocate exception.");
} else
__invalidate_snapshot(s, -ENOMEM);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
}
@@ -1851,14 +1851,15 @@ out_unlock:
return r;
}
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct dm_snapshot *s = ti->private;
if (is_bio_tracked(bio))
stop_tracking_chunk(s, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 75152482f3ad..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
}
}
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
unsigned i;
char major_minor[16];
struct stripe_c *sc = ti->private;
- if (!error)
- return 0; /* I/O complete */
+ if (!*error)
+ return DM_ENDIO_DONE; /* I/O complete */
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ if (bio->bi_opf & REQ_RAHEAD)
+ return DM_ENDIO_DONE;
- if (error == -EOPNOTSUPP)
- return error;
+ if (*error == BLK_STS_NOTSUPP)
+ return DM_ENDIO_DONE;
memset(major_minor, 0, sizeof(major_minor));
sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
schedule_work(&sc->trigger_event);
}
- return error;
+ return DM_ENDIO_DONE;
}
static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b750542f..c0d7e60820c4 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
static int io_err_map(struct dm_target *tt, struct bio *bio)
{
- return -EIO;
+ return DM_MAPIO_KILL;
}
static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 28808e5ec0fd..9dec2f8cc739 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
* Even if r is set, there could be sub discards in flight that we
* need to wait for.
*/
- if (r && !op->parent_bio->bi_error)
- op->parent_bio->bi_error = r;
+ if (r && !op->parent_bio->bi_status)
+ op->parent_bio->bi_status = errno_to_blk_status(r);
bio_endio(op->parent_bio);
}
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
}
static void cell_error_with_code(struct pool *pool,
- struct dm_bio_prison_cell *cell, int error_code)
+ struct dm_bio_prison_cell *cell, blk_status_t error_code)
{
dm_cell_error(pool->prison, cell, error_code);
dm_bio_prison_free_cell(pool->prison, cell);
}
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
{
- return pool->out_of_data_space ? -ENOSPC : -EIO;
+ return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
}
static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- int error = get_pool_io_error_code(pool);
-
- cell_error_with_code(pool, cell, error);
+ cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
}
static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+ cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
}
/*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
bio_list_init(master);
}
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
{
struct bio *bio;
while ((bio = bio_list_pop(bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+ blk_status_t error)
{
struct bio_list bios;
unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
__merge_bio_list(&bios, &tc->retry_on_resume_list);
spin_unlock_irqrestore(&tc->lock, flags);
- error_bio_list(&bios, DM_ENDIO_REQUEUE);
+ error_bio_list(&bios, BLK_STS_DM_REQUEUE);
requeue_deferred_cells(tc);
}
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
{
struct thin_c *tc;
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
static void error_retry_list(struct pool *pool)
{
- int error = get_pool_io_error_code(pool);
-
- error_retry_list_with_code(pool, error);
+ error_retry_list_with_code(pool, get_pool_io_error_code(pool));
}
/*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
*/
atomic_t prepare_actions;
- int err;
+ blk_status_t status;
struct thin_c *tc;
dm_block_t virt_begin, virt_end;
dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
{
struct dm_thin_new_mapping *m = context;
- m->err = read_err || write_err ? -EIO : 0;
+ m->status = read_err || write_err ? BLK_STS_IOERR : 0;
complete_mapping_preparation(m);
}
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
bio->bi_end_io = m->saved_bi_end_io;
- m->err = bio->bi_error;
+ m->status = bio->bi_status;
complete_mapping_preparation(m);
}
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
struct bio *bio = m->bio;
int r;
- if (m->err) {
+ if (m->status) {
cell_error(pool, m->cell);
goto out;
}
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
spin_unlock_irqrestore(&tc->lock, flags);
}
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
{
enum pool_mode m = get_pool_mode(pool);
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
case PM_WRITE:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
- return -EIO;
+ return BLK_STS_IOERR;
case PM_OUT_OF_DATA_SPACE:
- return pool->pf.error_if_no_space ? -ENOSPC : 0;
+ return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
case PM_READ_ONLY:
case PM_FAIL:
- return -EIO;
+ return BLK_STS_IOERR;
default:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
- return -EIO;
+ return BLK_STS_IOERR;
}
}
static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
{
- int error = should_error_unserviceable_bio(pool);
+ blk_status_t error = should_error_unserviceable_bio(pool);
if (error) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
} else
retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
{
struct bio *bio;
struct bio_list bios;
- int error;
+ blk_status_t error;
error = should_error_unserviceable_bio(pool);
if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
unsigned count = 0;
if (tc->requeue_mode) {
- error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+ error_thin_bio_list(tc, &tc->deferred_bio_list,
+ BLK_STS_DM_REQUEUE);
return;
}
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool);
- error_retry_list_with_code(pool, -ENOSPC);
+ error_retry_list_with_code(pool, BLK_STS_NOSPC);
}
}
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
thin_hook_bio(tc, bio);
if (tc->requeue_mode) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
return thin_bio_map(ti, bio);
}
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+ blk_status_t *err)
{
unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
if (h->cell)
cell_defer_no_holder(h->tc, h->cell);
- return 0;
+ return DM_ENDIO_DONE;
}
static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 1ec9b2c51c07..b46705ebf01f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
/*
* End one "io" structure with a given error.
*/
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
{
struct dm_verity *v = io->v;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
bio->bi_end_io = io->orig_bi_end_io;
- bio->bi_error = error;
+ bio->bi_status = status;
verity_fec_finish_io(io);
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
{
struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
- verity_finish_io(io, verity_verify_io(io));
+ verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}
static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
- if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
- verity_finish_io(io, bio->bi_error);
+ if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+ verity_finish_io(io, bio->bi_status);
return;
}
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
DMERR_LIMIT("unaligned io");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_end_sector(bio) >>
(v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
DMERR_LIMIT("io out of range");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_data_dir(bio) == WRITE)
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, ti->per_io_data_size);
io->v = v;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11d8473..b65ca8dcfbdc 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
case REQ_OP_READ:
if (bio->bi_opf & REQ_RAHEAD) {
/* readahead of null bytes only wastes buffer cache */
- return -EIO;
+ return DM_MAPIO_KILL;
}
zero_fill_bio(bio);
break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
/* writes get silently dropped */
break;
default:
- return -EIO;
+ return DM_MAPIO_KILL;
}
bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 37ccd73c79ec..402946035308 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
*/
struct dm_io {
struct mapped_device *md;
- int error;
+ blk_status_t status;
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
{
unsigned long flags;
- int io_error;
+ blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
spin_lock_irqsave(&io->endio_lock, flags);
- if (!(io->error > 0 && __noflush_suspending(md)))
- io->error = error;
+ if (!(io->status == BLK_STS_DM_REQUEUE &&
+ __noflush_suspending(md)))
+ io->status = error;
spin_unlock_irqrestore(&io->endio_lock, flags);
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->error == DM_ENDIO_REQUEUE) {
+ if (io->status == BLK_STS_DM_REQUEUE) {
/*
* Target requested pushing back the I/O.
*/
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
- io->error = -EIO;
+ io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- io_error = io->error;
+ io_error = io->status;
bio = io->bio;
end_io_acct(io);
free_io(md, io);
- if (io_error == DM_ENDIO_REQUEUE)
+ if (io_error == BLK_STS_DM_REQUEUE)
return;
if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- bio->bi_error = io_error;
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
static void clone_endio(struct bio *bio)
{
- int error = bio->bi_error;
- int r = error;
+ blk_status_t error = bio->bi_status;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (endio) {
- r = endio(tio->ti, bio, error);
- if (r < 0 || r == DM_ENDIO_REQUEUE)
- /*
- * error and requeue request are handled
- * in dec_pending().
- */
- error = r;
- else if (r == DM_ENDIO_INCOMPLETE)
- /* The target will handle the io */
- return;
- else if (r) {
- DMWARN("unimplemented target endio return value: %d", r);
- BUG();
- }
- }
-
- if (unlikely(r == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
!bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
disable_write_zeroes(md);
}
+ if (endio) {
+ int r = endio(tio->ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
+ /* The target will handle the io */
+ return;
+ default:
+ DMWARN("unimplemented target endio return value: %d", r);
+ BUG();
+ }
+ }
+
free_tio(tio);
dec_pending(io, error);
}
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
while ((bio = bio_list_pop(&list))) {
struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
+ if (unlikely(!bs) || bs == fs_bio_set ||
+ !bs->rescue_workqueue) {
bio_list_add(&current->bio_list[i], bio);
continue;
}
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
r = ti->type->map(ti, clone);
dm_offload_end(&o);
- if (r == DM_MAPIO_REMAPPED) {
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
-
trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev, sector);
-
generic_make_request(clone);
- } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
- /* error the io and bail out, or requeue it if needed */
- dec_pending(tio->io, r);
+ break;
+ case DM_MAPIO_KILL:
+ dec_pending(tio->io, BLK_STS_IOERR);
+ free_tio(tio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ dec_pending(tio->io, BLK_STS_DM_REQUEUE);
free_tio(tio);
- } else if (r != DM_MAPIO_SUBMITTED) {
+ break;
+ default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
}
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
ci.map = map;
ci.md = md;
ci.io = alloc_io(md);
- ci.io->error = 0;
+ ci.io->status = 0;
atomic_set(&ci.io->io_count, 1);
ci.io->bio = bio;
ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
* Initialize aspects of queue that aren't relevant for blk-mq
*/
md->queue->backing_dev_info->congested_fn = dm_any_congested;
- blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
}
static void cleanup_mapped_device(struct mapped_device *md)
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
BUG();
}
- pools->bs = bioset_create_nobvec(pool_size, front_pad);
+ pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
if (!pools->bs)
goto out;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 84e76ebac4d4..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
static bool create_on_open = true;
/* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
*/
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
unsigned int sectors;
int cpu;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
}
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
- bio->bi_error = -EROFS;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
- if (bio->bi_error) {
- pr_err("md: super_written gets error=%d\n", bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("md: super_written gets error=%d\n", bio->bi_status);
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
submit_bio_wait(bio);
- ret = !bio->bi_error;
+ ret = !bio->bi_status;
bio_put(bio);
return ret;
}
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
}
if (mddev->bio_set == NULL) {
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!mddev->bio_set)
return -ENOMEM;
}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
{
struct bio *bio = mp_bh->master_bio;
struct mpconf *conf = mp_bh->mddev->private;
- bio->bi_error = err;
+ bio->bi_status = status;
bio_endio(bio);
mempool_free(mp_bh, conf->pool);
}
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
struct mpconf *conf = mp_bh->mddev->private;
struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
- if (!bio->bi_error)
+ if (!bio->bi_status)
multipath_end_bh_io(mp_bh, 0);
else if (!(bio->bi_opf & REQ_RAHEAD)) {
/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector);
multipath_reschedule_retry(mp_bh);
} else
- multipath_end_bh_io(mp_bh, bio->bi_error);
+ multipath_end_bh_io(mp_bh, bio->bi_status);
rdev_dec_pending(rdev, conf->mddev);
}
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
- multipath_end_bh_io(mp_bh, -EIO);
+ multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e1a7e3d4c5e4..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
static void raid1_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = bio->bi_private;
struct r1conf *conf = r1_bio->mddev->private;
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
/*
* 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
* or re-read if the read failed.
* We don't do much here, just schedule handling by raid1d
*/
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R1BIO_Uptodate, &r1_bio->state);
if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
static void end_sync_write(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = get_resync_r1bio(bio);
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
idx ++;
}
set_bit(R1BIO_Uptodate, &r1_bio->state);
- bio->bi_error = 0;
+ bio->bi_status = 0;
return 1;
}
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int size;
- int error;
+ blk_status_t status;
struct bio_vec *bi;
struct bio *b = r1_bio->bios[i];
struct resync_pages *rp = get_resync_pages(b);
if (b->bi_end_io != end_sync_read)
continue;
/* fixup the bio for reuse, but preserve errno */
- error = b->bi_error;
+ status = b->bi_status;
bio_reset(b);
- b->bi_error = error;
+ b->bi_status = status;
b->bi_vcnt = vcnt;
b->bi_iter.bi_size = r1_bio->sectors << 9;
b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
}
for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
- !r1_bio->bios[primary]->bi_error) {
+ !r1_bio->bios[primary]->bi_status) {
r1_bio->bios[primary]->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
int j;
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
- int error = sbio->bi_error;
+ blk_status_t status = sbio->bi_status;
struct page **ppages = get_resync_pages(pbio)->pages;
struct page **spages = get_resync_pages(sbio)->pages;
struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
if (sbio->bi_end_io != end_sync_read)
continue;
/* Now we can 'fixup' the error value */
- sbio->bi_error = 0;
+ sbio->bi_status = 0;
bio_for_each_segment_all(bi, sbio, j)
page_len[j] = bi->bv_len;
- if (!error) {
+ if (!status) {
for (j = vcnt; j-- ; ) {
if (memcmp(page_address(ppages[j]),
page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
if (j >= 0)
atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
- && !error)) {
+ && !status)) {
/* No need to write to this device. */
sbio->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io == NULL)
continue;
- if (!bio->bi_error &&
+ if (!bio->bi_status &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
}
- if (bio->bi_error &&
+ if (bio->bi_status &&
test_bit(R1BIO_WriteError, &r1_bio->state)) {
if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
md_error(conf->mddev, rdev);
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!conf->r1bio_pool)
goto abort;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 797ed60abd5e..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
struct r10conf *conf = r10_bio->mddev->private;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
static void raid10_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
struct bio *to_put = NULL;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
{
struct r10conf *conf = r10_bio->mddev->private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R10BIO_Uptodate, &r10_bio->state);
else
/* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
else
rdev = conf->mirrors[d].rdev;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (repl)
md_error(mddev, rdev);
else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
/* find the first device with a block */
for (i=0; i<conf->copies; i++)
- if (!r10_bio->devs[i].bio->bi_error)
+ if (!r10_bio->devs[i].bio->bi_status)
break;
if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tpages = get_resync_pages(tbio)->pages;
d = r10_bio->devs[i].devnum;
rdev = conf->mirrors[d].rdev;
- if (!r10_bio->devs[i].bio->bi_error) {
+ if (!r10_bio->devs[i].bio->bi_status) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
continue;
- if (!r10_bio->devs[m].bio->bi_error) {
+ if (!r10_bio->devs[m].bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (r10_bio->devs[m].repl_bio == NULL)
continue;
- if (!r10_bio->devs[m].repl_bio->bi_error) {
+ if (!r10_bio->devs[m].repl_bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev);
- } else if (bio != NULL && bio->bi_error) {
+ } else if (bio != NULL && bio->bi_status) {
fail = true;
if (!narrow_write_error(r10_bio, m)) {
md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
bio = r10_bio->devs[i].bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
sector = r10_bio->devs[i].addr;
bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
- bio->bi_error = 0;
+ bio->bi_status = 0;
generic_make_request(bio);
}
}
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
if (!conf->r10bio_pool)
goto out;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto out;
@@ -4397,7 +4397,7 @@ read_more:
read_bio->bi_end_io = end_reshape_read;
bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
- read_bio->bi_error = 0;
+ read_bio->bi_status = 0;
read_bio->bi_vcnt = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
@@ -4641,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
rdev = conf->mirrors[d].rdev;
}
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* FIXME should record badblock */
md_error(mddev, rdev);
}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0a7af8b0a80a..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
struct r5l_log *log = io->log;
unsigned long flags;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
unsigned long flags;
struct r5l_io_unit *io;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
spin_lock_irqsave(&log->io_list_lock, flags);
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log->io_pool)
goto io_pool;
- log->bs = bioset_create(R5L_POOL_SIZE, 0);
+ log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!log->bs)
goto io_bs;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index ccce92e68d7f..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
pr_debug("%s: seq: %llu\n", __func__, io->seq);
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(ppl_conf->mddev, log->rdev);
list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
goto err;
}
- ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+ ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
if (!ppl_conf->bs) {
ret = -ENOMEM;
goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ec0f951ae19f..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
s = sh->sector + rdev->new_data_offset;
else
s = sh->sector + rdev->data_offset;
- if (!bi->bi_error) {
+ if (!bi->bi_status) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
/* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
}
pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
}
if (replacement) {
- if (bi->bi_error)
+ if (bi->bi_status)
md_error(conf->mddev, rdev);
else if (is_badblock(rdev, sh->sector,
STRIPE_SECTORS,
&first_bad, &bad_sectors))
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
- if (bi->bi_error) {
+ if (bi->bi_status) {
set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
}
rdev_dec_pending(rdev, conf->mddev);
- if (sh->batch_head && bi->bi_error && !replacement)
+ if (sh->batch_head && bi->bi_status && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
bio_endio(bi);
bi = nextbi;
}
@@ -5154,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
- int error = bi->bi_error;
+ blk_status_t error = bi->bi_status;
bio_put(bi);
@@ -5731,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
break;
}
}
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
}
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
conf->mddev = mddev;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c27fb7..22de7f5ed032 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
spin_lock_irqsave(&msb->q_lock, flags);
if (len)
- if (!__blk_end_request(msb->req, 0, len))
+ if (!__blk_end_request(msb->req, BLK_STS_OK, len))
msb->req = NULL;
if (error && msb->req) {
+ blk_status_t ret = errno_to_blk_status(error);
dbg_verbose("IO: ending one sector of the request with error");
- if (!__blk_end_request(msb->req, error, msb->page_size))
+ if (!__blk_end_request(msb->req, ret, msb->page_size))
msb->req = NULL;
}
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
WARN_ON(!msb->io_queue_stopped);
while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a266878..8897962781bb 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
msb->req_sg);
if (!msb->seg_count) {
- chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+ chunk = __blk_end_request_cur(msb->block_req,
+ BLK_STS_RESOURCE);
continue;
}
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
if (error && !t_len)
t_len = blk_rq_cur_bytes(msb->block_req);
- chunk = __blk_end_request(msb->block_req, error, t_len);
+ chunk = __blk_end_request(msb->block_req,
+ errno_to_blk_status(error), t_len);
error = mspro_block_issue_req(card, chunk);
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
if (msb->eject) {
while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..6ff94a948a4b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_DISCARD;
+ blk_status_t status = BLK_STS_OK;
if (!mmc_can_erase(card)) {
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
goto fail;
}
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
if (!err)
err = mmc_erase(card, from, nr, arg);
} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
- if (!err)
+ if (err)
+ status = BLK_STS_IOERR;
+ else
mmc_blk_reset_success(md, type);
fail:
- blk_end_request(req, err, blk_rq_bytes(req));
+ blk_end_request(req, status, blk_rq_bytes(req));
}
static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_SECDISCARD;
+ blk_status_t status = BLK_STS_OK;
if (!(mmc_can_secure_erase_trim(card))) {
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
goto out;
}
@@ -1254,8 +1258,10 @@ retry:
err = mmc_erase(card, from, nr, arg);
if (err == -EIO)
goto out_retry;
- if (err)
+ if (err) {
+ status = BLK_STS_IOERR;
goto out;
+ }
if (arg == MMC_SECURE_TRIM1_ARG) {
if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
if (err == -EIO)
goto out_retry;
- if (err)
+ if (err) {
+ status = BLK_STS_IOERR;
goto out;
+ }
}
out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
if (!err)
mmc_blk_reset_success(md, type);
out:
- blk_end_request(req, err, blk_rq_bytes(req));
+ blk_end_request(req, status, blk_rq_bytes(req));
}
static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
int ret = 0;
ret = mmc_flush_cache(card);
- if (ret)
- ret = -EIO;
-
- blk_end_request_all(req, ret);
+ blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
}
/*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
{
if (mmc_card_removed(card))
req->rq_flags |= RQF_QUIET;
- while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+ while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
mmc_queue_req_free(mq, mqrq);
}
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
*/
if (mmc_card_removed(mq->card)) {
req->rq_flags |= RQF_QUIET;
- blk_end_request_all(req, -EIO);
+ blk_end_request_all(req, BLK_STS_IOERR);
mmc_queue_req_free(mq, mqrq);
return;
}
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
*/
mmc_blk_reset_success(md, type);
- req_pending = blk_end_request(old_req, 0,
+ req_pending = blk_end_request(old_req, BLK_STS_OK,
brq->data.bytes_xfered);
/*
* If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
* time, so we only reach here after trying to
* read a single sector.
*/
- req_pending = blk_end_request(old_req, -EIO,
+ req_pending = blk_end_request(old_req, BLK_STS_IOERR,
brq->data.blksz);
if (!req_pending) {
mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
ret = mmc_blk_part_switch(card, md);
if (ret) {
if (req) {
- blk_end_request_all(req, -EIO);
+ blk_end_request_all(req, BLK_STS_IOERR);
}
goto out;
}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..b659a28c8018 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
if (!mq) {
while ((req = blk_fetch_request(q)) != NULL) {
req->rq_flags |= RQF_QUIET;
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
return;
}
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
mmc_queue_setup_discard(mq->queue, card);
if (card->bouncesz) {
- blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
blk_queue_max_segments(mq->queue, card->bouncesz / 512);
blk_queue_max_segment_size(mq->queue, card->bouncesz);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd7dbf6..f336a9b85576 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
}
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
struct mtd_blktrans_dev *dev,
struct request *req)
{
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
buf = bio_data(req->bio);
- if (req_op(req) == REQ_OP_FLUSH)
- return tr->flush(dev);
+ if (req_op(req) == REQ_OP_FLUSH) {
+ if (tr->flush(dev))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
+ }
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
get_capacity(req->rq_disk))
- return -EIO;
+ return BLK_STS_IOERR;
switch (req_op(req)) {
case REQ_OP_DISCARD:
- return tr->discard(dev, block, nsect);
+ if (tr->discard(dev, block, nsect))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
case REQ_OP_READ:
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
- return -EIO;
+ return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
- return 0;
+ return BLK_STS_OK;
case REQ_OP_WRITE:
if (!tr->writesect)
- return -EIO;
+ return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
- return -EIO;
- return 0;
+ return BLK_STS_IOERR;
default:
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
spin_lock_irq(rq->queue_lock);
while (1) {
- int res;
+ blk_status_t res;
dev->bg_stop = false;
if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
if (!dev)
while ((req = blk_fetch_request(rq)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
else
queue_work(dev->wq, &dev->work);
}
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);
+ blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65439df..c3963f880448 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
ret = ubiblock_read(pdu);
rq_flush_dcache_pages(req);
- blk_mq_end_request(req, ret);
+ blk_mq_end_request(req, errno_to_blk_status(ret));
}
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
case REQ_OP_READ:
ubi_sgl_init(&pdu->usgl);
queue_work(dev->wq, &pdu->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
default:
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a75e96..f12d23c49771 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
* another kernel subsystem, and we just pass it through.
*/
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
goto out;
}
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
"io error in %s sector %lld, len %d,\n",
(rw == READ) ? "READ" : "WRITE",
(unsigned long long) iter.bi_sector, len);
- bio->bi_error = err;
+ bio->bi_status = errno_to_blk_status(err);
break;
}
}
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
q->queuedata = nsblk;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b8fd9b..b6ba0618ea46 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
* another kernel subsystem, and we just pass it through.
*/
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
goto out;
}
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
(op_is_write(bio_op(bio))) ? "WRITE" :
"READ",
(unsigned long long) iter.bi_sector, len);
- bio->bi_error = err;
+ bio->bi_status = errno_to_blk_status(err);
break;
}
}
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
- blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
btt->btt_queue->queuedata = btt;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..6b577afb1d44 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
return to_nd_region(to_dev(pmem)->parent);
}
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
- unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+ phys_addr_t offset, unsigned int len)
{
struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
- int rc = 0;
+ blk_status_t rc = BLK_STS_OK;
sector = (offset - pmem->data_offset) / 512;
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared < len)
- rc = -EIO;
+ rc = BLK_STS_IOERR;
if (cleared > 0 && cleared / 512) {
cleared /= 512;
dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
kunmap_atomic(mem);
}
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
void *pmem_addr, unsigned int len)
{
int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
rc = memcpy_mcsafe(mem + off, pmem_addr, len);
kunmap_atomic(mem);
if (rc)
- return -EIO;
- return 0;
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
}
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, bool is_write,
sector_t sector)
{
- int rc = 0;
+ blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false;
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (!is_write) {
if (unlikely(bad_pmem))
- rc = -EIO;
+ rc = BLK_STS_IOERR;
else {
rc = read_pmem(page, off, pmem_addr, len);
flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
- int rc = 0;
+ blk_status_t rc = 0;
bool do_acct;
unsigned long start;
struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
bvec.bv_offset, op_is_write(bio_op(bio)),
iter.bi_sector);
if (rc) {
- bio->bi_error = rc;
+ bio->bi_status = rc;
break;
}
}
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
- int rc;
+ blk_status_t rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
if (rc == 0)
page_endio(page, is_write, 0);
- return rc;
+ return blk_status_to_errno(rc);
}
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
q->queuedata = pmem;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 90745a616df7..46d6cb1e03bd 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -13,18 +13,6 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the
module will be called nvme.
-config BLK_DEV_NVME_SCSI
- bool "SCSI emulation for NVMe device nodes"
- depends on NVME_CORE
- ---help---
- This adds support for the SG_IO ioctl on the NVMe character
- and block devices nodes, as well as a translation for a small
- number of selected SCSI commands to NVMe commands to the NVMe
- driver. If you don't know what this means you probably want
- to say N here, unless you run a distro that abuses the SCSI
- emulation to provide stable device names for mount by id, like
- some OpenSuSE and SLES versions.
-
config NVME_FABRICS
tristate
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index f1a7d945fbb6..cc0aacb4c8b4 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -5,7 +5,6 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
nvme-core-y := core.o
-nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 903d5813023a..d70df1d0072d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -27,7 +27,6 @@
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
-#include <scsi/sg.h>
#include <asm/unaligned.h>
#include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
@@ -65,34 +64,53 @@ static bool force_apst;
module_param(force_apst, bool, 0644);
MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ return -EBUSY;
+ if (!queue_work(nvme_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = nvme_reset_ctrl(ctrl);
+ if (!ret)
+ flush_work(&ctrl->reset_work);
+ return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
{
switch (nvme_req(req)->status & 0x7ff) {
case NVME_SC_SUCCESS:
- return 0;
+ return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
- return -ENOSPC;
- default:
- return -EIO;
-
- /*
- * XXX: these errors are a nasty side-band protocol to
- * drivers/md/dm-mpath.c:noretry_error() that aren't documented
- * anywhere..
- */
- case NVME_SC_CMD_SEQ_ERROR:
- return -EILSEQ;
+ return BLK_STS_NOSPC;
case NVME_SC_ONCS_NOT_SUPPORTED:
- return -EOPNOTSUPP;
+ return BLK_STS_NOTSUPP;
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
- return -ENODATA;
+ return BLK_STS_MEDIUM;
+ default:
+ return BLK_STS_IOERR;
}
}
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_RECONNECTING:
changed = true;
/* FALLTHRU */
default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+
+ c.directive.opcode = nvme_admin_directive_send;
+ c.directive.nsid = cpu_to_le32(0xffffffff);
+ c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+ c.directive.dtype = NVME_DIR_IDENTIFY;
+ c.directive.tdtype = NVME_DIR_STREAMS;
+ c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+ struct streams_directive_params *s, u32 nsid)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ memset(s, 0, sizeof(*s));
+
+ c.directive.opcode = nvme_admin_directive_recv;
+ c.directive.nsid = cpu_to_le32(nsid);
+ c.directive.numd = sizeof(*s);
+ c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+ c.directive.dtype = NVME_DIR_STREAMS;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+ return 0;
+ if (!streams)
+ return 0;
+
+ ret = nvme_enable_streams(ctrl);
+ if (ret)
+ return ret;
+
+ ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+ if (ret)
+ return ret;
+
+ ctrl->nssa = le16_to_cpu(s.nssa);
+ if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+ dev_info(ctrl->device, "too few streams (%u) available\n",
+ ctrl->nssa);
+ nvme_disable_streams(ctrl);
+ return 0;
+ }
+
+ ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+ dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+ return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+ struct request *req, u16 *control,
+ u32 *dsmgmt)
+{
+ enum rw_hint streamid = req->write_hint;
+
+ if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+ streamid = 0;
+ else {
+ streamid--;
+ if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+ return;
+
+ *control |= NVME_RW_DTYPE_STREAMS;
+ *dsmgmt |= streamid << 16;
+ }
+
+ if (streamid < ARRAY_SIZE(req->q->write_hints))
+ req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
cmnd->common.nsid = cpu_to_le32(ns->ns_id);
}
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
if (!range)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
__rq_for_each_bio(bio, req) {
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
if (WARN_ON_ONCE(n != segments)) {
kfree(range);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_len = sizeof(*range) * segments;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+ struct request *req, struct nvme_command *cmnd)
{
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
u32 dsmgmt = 0;
+ /*
+ * If formated with metadata, require the block layer provide a buffer
+ * unless this namespace is formated such that the metadata can be
+ * stripped/generated by the controller with PRACT=1.
+ */
+ if (ns && ns->ms &&
+ (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+ !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+ return BLK_STS_NOTSUPP;
+
if (req->cmd_flags & REQ_FUA)
control |= NVME_RW_FUA;
if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+ nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
if (ns->ms) {
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ return 0;
}
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd)
{
- int ret = BLK_MQ_RQ_QUEUE_OK;
+ blk_status_t ret = BLK_STS_OK;
if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
break;
case REQ_OP_READ:
case REQ_OP_WRITE:
- nvme_setup_rw(ns, req, cmd);
+ ret = nvme_setup_rw(ns, req, cmd);
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
result, timeout);
}
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
{
struct nvme_ctrl *ctrl = rq->end_io_data;
blk_mq_free_request(rq);
- if (error) {
+ if (status) {
dev_err(ctrl->device,
- "failed nvme_keep_alive_end_io error=%d\n", error);
+ "failed nvme_keep_alive_end_io error=%d\n",
+ status);
return;
}
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
if (nvme_keep_alive(ctrl)) {
/* allocation failure, reset the controller */
dev_err(ctrl->device, "keep-alive failed\n");
- ctrl->ops->reset_ctrl(ctrl);
+ nvme_reset_ctrl(ctrl);
return;
}
}
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+ struct nvme_command c = { };
+ int status;
+ void *data;
+ int pos;
+ int len;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(nsid);
+ c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+ data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+ NVME_IDENTIFY_DATA_SIZE);
+ if (status)
+ goto free_data;
+
+ for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+ struct nvme_ns_id_desc *cur = data + pos;
+
+ if (cur->nidl == 0)
+ break;
+
+ switch (cur->nidt) {
+ case NVME_NIDT_EUI64:
+ if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_EUI64_LEN;
+ memcpy(ns->eui, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_NGUID:
+ if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_NGUID_LEN;
+ memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_UUID:
+ if (cur->nidl != NVME_NIDT_UUID_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_UUID_LEN;
+ uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+ break;
+ default:
+ /* Skip unnkown types */
+ len = cur->nidl;
+ break;
+ }
+
+ len += sizeof(*cur);
+ }
+free_data:
+ kfree(data);
+ return status;
+}
+
static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
{
struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id)
{
struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
return error;
}
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result)
-{
- struct nvme_command c;
- union nvme_result res;
- int ret;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_get_features;
- c.features.nsid = cpu_to_le32(nsid);
- c.features.fid = cpu_to_le32(fid);
-
- ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
- NVME_QID_ANY, 0, 0);
- if (ret >= 0 && result)
- *result = le32_to_cpu(res.u32);
- return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
void *buffer, size_t buflen, u32 *result)
{
struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
return ret;
}
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
-
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
-}
-
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
* access to the admin queue, as that might be only way to fix them up.
*/
if (status > 0) {
- dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+ dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
*count = 0;
} else {
nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
- case SG_GET_VERSION_NUM:
- return nvme_sg_get_version_num((void __user *)arg);
- case SG_IO:
- return nvme_sg_io(ns, (void __user *)arg);
-#endif
default:
#ifdef CONFIG_NVM
if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- switch (cmd) {
- case SG_IO:
- return -ENOIOCTLCMD;
- }
return nvme_ioctl(bdev, mode, cmd, arg);
}
#else
@@ -983,6 +1135,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+ u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
static void nvme_config_discard(struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
+ if (ctrl->nr_streams && ns->sws && ns->sgs) {
+ unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+ ns->queue->limits.discard_alignment = sz;
+ ns->queue->limits.discard_granularity = sz;
+ } else {
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ }
blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1016,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
- memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+ memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+ if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+ /* Don't treat error as fatal we potentially
+ * already have a NGUID or EUI-64
+ */
+ if (nvme_identify_ns_descs(ns, ns->ns_id))
+ dev_warn(ns->ctrl->device,
+ "%s: Identify Descriptors failed\n", __func__);
+ }
return 0;
}
@@ -1024,6 +1197,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 bs;
/*
@@ -1034,12 +1208,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
bs = 1 << ns->lba_shift;
+ ns->noiob = le16_to_cpu(id->noiob);
blk_mq_freeze_queue(disk->queue);
- if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+ if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
nvme_prep_integrity(disk, id, bs);
blk_queue_logical_block_size(ns->queue, bs);
+ if (ns->noiob)
+ nvme_set_chunk_size(ns);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
@@ -1283,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
- unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+ unsigned long timeout = jiffies + (shutdown_timeout * HZ);
u32 csts;
int ret;
@@ -1372,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
if (!table)
return;
- if (ctrl->ps_max_latency_us == 0) {
+ if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
/* Turn off APST. */
apste = 0;
dev_dbg(ctrl->device, "APST disabled\n");
@@ -1528,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
string_matches(id->fr, q->fr, sizeof(id->fr));
}
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+ size_t nqnlen;
+ int off;
+
+ nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+ if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+ strcpy(ctrl->subnqn, id->subnqn);
+ return;
+ }
+
+ if (ctrl->vs >= NVME_VS(1, 2, 1))
+ dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+ /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+ off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+ "nqn.2014.08.org.nvmexpress:%4x%4x",
+ le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+ memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+ off += sizeof(id->sn);
+ memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+ off += sizeof(id->mn);
+ memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
@@ -1539,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
- u8 prev_apsta;
+ bool prev_apst_enabled;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
@@ -1563,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return -EIO;
}
+ nvme_init_subnqn(ctrl, id);
+
if (!ctrl->identified) {
/*
* Check for quirks. Quirk can depend on firmware version,
@@ -1582,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
- dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+ dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
}
@@ -1607,16 +1811,17 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->kas = le16_to_cpu(id->kas);
ctrl->npss = id->npss;
- prev_apsta = ctrl->apsta;
+ ctrl->apsta = id->apsta;
+ prev_apst_enabled = ctrl->apst_enabled;
if (ctrl->quirks & NVME_QUIRK_NO_APST) {
if (force_apst && id->apsta) {
- dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
- ctrl->apsta = 1;
+ dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+ ctrl->apst_enabled = true;
} else {
- ctrl->apsta = 0;
+ ctrl->apst_enabled = false;
}
} else {
- ctrl->apsta = id->apsta;
+ ctrl->apst_enabled = id->apsta;
}
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
@@ -1634,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = -EINVAL;
if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"keep-alive support is mandatory for fabrics\n");
ret = -EINVAL;
}
} else {
ctrl->cntlid = le16_to_cpu(id->cntlid);
+ ctrl->hmpre = le32_to_cpu(id->hmpre);
+ ctrl->hmmin = le32_to_cpu(id->hmmin);
}
kfree(id);
- if (ctrl->apsta && !prev_apsta)
+ if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
- else if (!ctrl->apsta && prev_apsta)
+ else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
nvme_configure_apst(ctrl);
+ nvme_configure_directives(ctrl);
ctrl->identified = true;
@@ -1735,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
dev_warn(ctrl->device, "resetting controller\n");
- return ctrl->ops->reset_ctrl(ctrl);
+ return nvme_reset_ctrl_sync(ctrl);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_reset_subsystem(ctrl);
case NVME_IOCTL_RESCAN:
@@ -1761,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
int ret;
- ret = ctrl->ops->reset_ctrl(ctrl);
+ ret = nvme_reset_ctrl_sync(ctrl);
if (ret < 0)
return ret;
return count;
@@ -1787,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
int serial_len = sizeof(ctrl->serial);
int model_len = sizeof(ctrl->model);
- if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
- return sprintf(buf, "eui.%16phN\n", ns->uuid);
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return sprintf(buf, "eui.%16phN\n", ns->nguid);
if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1803,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- return sprintf(buf, "%pU\n", ns->uuid);
+
+ /* For backward compatibility expose the NGUID to userspace if
+ * we have no UUID set
+ */
+ if (uuid_is_null(&ns->uuid)) {
+ printk_ratelimited(KERN_WARNING
+ "No UUID available providing old NGUID\n");
+ return sprintf(buf, "%pU\n", ns->nguid);
+ }
+ return sprintf(buf, "%pU\n", &ns->uuid);
}
static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
@@ -1830,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
+ &dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
NULL,
@@ -1842,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
if (a == &dev_attr_uuid.attr) {
- if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+ if (uuid_is_null(&ns->uuid) ||
+ !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return 0;
+ }
+ if (a == &dev_attr_nguid.attr) {
+ if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
return 0;
}
if (a == &dev_attr_eui.attr) {
@@ -1931,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- return snprintf(buf, PAGE_SIZE, "%s\n",
- ctrl->ops->get_subsysnqn(ctrl));
+ return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
}
static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
@@ -1961,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
NULL
};
-#define CHECK_ATTR(ctrl, a, name) \
- if ((a) == &dev_attr_##name.attr && \
- !(ctrl)->ops->get_##name) \
- return 0
-
static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- if (a == &dev_attr_delete_controller.attr) {
- if (!ctrl->ops->delete_ctrl)
- return 0;
- }
-
- CHECK_ATTR(ctrl, a, subsysnqn);
- CHECK_ATTR(ctrl, a, address);
+ if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+ return 0;
+ if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+ return 0;
return a->mode;
}
@@ -2019,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return ret;
}
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!ctrl->nr_streams)
+ return 0;
+
+ ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+ if (ret)
+ return ret;
+
+ ns->sws = le32_to_cpu(s.sws);
+ ns->sgs = le16_to_cpu(s.sgs);
+
+ if (ns->sws) {
+ unsigned int bs = 1 << ns->lba_shift;
+
+ blk_queue_io_min(ns->queue, bs * ns->sws);
+ if (ns->sgs)
+ blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+ }
+
+ return 0;
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@@ -2048,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
+ nvme_setup_streams_ns(ctrl, ns);
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
@@ -2056,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
- dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+ dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
goto out_free_id;
}
@@ -2231,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
* removal.
*/
if (ctrl->state == NVME_CTRL_LIVE)
- schedule_work(&ctrl->scan_work);
+ queue_work(nvme_wq, &ctrl->scan_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_scan);
@@ -2286,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
/*FALLTHRU*/
case NVME_SC_ABORT_REQ:
++ctrl->event_limit;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
break;
default:
break;
@@ -2309,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_queue_async_events(struct nvme_ctrl *ctrl)
{
ctrl->event_limit = NVME_NR_AERS;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_async_events);
@@ -2442,6 +2691,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+
/* Forcibly start all queues to avoid having stuck requests */
blk_mq_start_hw_queues(ctrl->admin_q);
@@ -2455,6 +2707,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ns->queue);
+
/*
* Forcibly start all queues to avoid having stuck requests.
* Note that we must ensure the queues are not stopped
@@ -2533,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_unquiesce_queue(ns->queue);
blk_mq_kick_requeue_list(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
@@ -2544,10 +2799,15 @@ int __init nvme_core_init(void)
{
int result;
+ nvme_wq = alloc_workqueue("nvme-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_wq)
+ return -ENOMEM;
+
result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
&nvme_dev_fops);
if (result < 0)
- return result;
+ goto destroy_wq;
else if (result > 0)
nvme_char_major = result;
@@ -2559,8 +2819,10 @@ int __init nvme_core_init(void)
return 0;
- unregister_chrdev:
+unregister_chrdev:
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+ destroy_workqueue(nvme_wq);
return result;
}
@@ -2568,6 +2830,7 @@ void nvme_core_exit(void)
{
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ destroy_workqueue(nvme_wq);
}
MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c190d7e36900..2e582a240943 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
kref_init(&host->ref);
memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
- uuid_gen(&host->id);
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
return NULL;
kref_init(&host->ref);
- uuid_gen(&host->id);
snprintf(host->nqn, NVMF_NQN_SIZE,
"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
EXPORT_SYMBOL_GPL(nvmf_get_address);
/**
- * nvmf_get_subsysnqn() - Get subsystem NQN
- * @ctrl: Host NVMe controller instance which we got the NQN
- */
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
-{
- return ctrl->opts->subsysnqn;
-}
-EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
-
-/**
* nvmf_reg_read32() - NVMe Fabrics "Property Get" API function.
* @ctrl: Host NVMe controller instance maintaining the admin
* queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
}
}
break;
+
+ case NVME_SC_CONNECT_INVALID_HOST:
+ dev_err(ctrl->device,
+ "Connect for subsystem %s is not allowed, hostnqn: %s\n",
+ data->subsysnqn, data->hostnqn);
+ break;
+
+ case NVME_SC_CONNECT_CTRL_BUSY:
+ dev_err(ctrl->device,
+ "Connect command failed: controller is busy or not available\n");
+ break;
+
+ case NVME_SC_CONNECT_FORMAT:
+ dev_err(ctrl->device,
+ "Connect incompatible format: %d",
+ cmd->connect.recfmt);
+ break;
+
default:
dev_err(ctrl->device,
"Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
cmd.connect.opcode = nvme_fabrics_command;
cmd.connect.fctype = nvme_fabrics_type_connect;
cmd.connect.qid = 0;
-
- /*
- * fabrics spec sets a minimum of depth 32 for admin queue,
- * so set the queue with this depth always until
- * justification otherwise.
- */
- cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+ cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
/*
* Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
{
if (ctrl->opts->max_reconnects != -1 &&
- ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+ ctrl->nr_reconnects < ctrl->opts->max_reconnects)
return true;
return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_KATO, "keep_alive_tmo=%d" },
{ NVMF_OPT_HOSTNQN, "hostnqn=%s" },
{ NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
+ { NVMF_OPT_HOST_ID, "hostid=%s" },
{ NVMF_OPT_ERR, NULL }
};
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
int token, ret = 0;
size_t nqnlen = 0;
int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+ uuid_t hostid;
/* Set defaults */
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
if (!options)
return -ENOMEM;
+ uuid_gen(&hostid);
+
while ((p = strsep(&o, ",\n")) != NULL) {
if (!*p)
continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->host_traddr = p;
break;
+ case NVMF_OPT_HOST_ID:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (uuid_parse(p, &hostid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->host = nvmf_default_host;
}
+ uuid_copy(&opts->host->id, &hostid);
+
out:
if (!opts->discovery_nqn && !opts->kato)
opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
- NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+ NVMF_OPT_HOST_ID)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_unlock;
}
+ if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
+ dev_warn(ctrl->device,
+ "controller returned incorrect NQN: \"%s\".\n",
+ ctrl->subnqn);
+ mutex_unlock(&nvmf_transports_mutex);
+ ctrl->ops->delete_ctrl(ctrl);
+ return ERR_PTR(-EINVAL);
+ }
+
mutex_unlock(&nvmf_transports_mutex);
return ctrl;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 29be7600689d..bf33663218cd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -56,6 +56,7 @@ enum {
NVMF_OPT_RECONNECT_DELAY = 1 << 9,
NVMF_OPT_HOST_TRADDR = 1 << 10,
NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
+ NVMF_OPT_HOST_ID = 1 << 12,
};
/**
@@ -80,7 +81,6 @@ enum {
* @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
* @kato: Keep-alive timeout.
* @host: Virtual NVMe host, contains the NQN and Host ID.
- * @nr_reconnects: number of reconnect attempted since the last ctrl failure
* @max_reconnects: maximum number of allowed reconnect attempts before removing
* the controller, (-1) means reconnect forever, zero means remove
* immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
bool discovery_nqn;
unsigned int kato;
struct nvmf_host *host;
- int nr_reconnects;
int max_reconnects;
};
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5ee4c71d168d..ed87214fdc0e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -36,7 +36,7 @@
*/
#define NVME_FC_NR_AEN_COMMANDS 1
#define NVME_FC_AQ_BLKMQ_DEPTH \
- (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
enum nvme_fc_queue_flags {
@@ -161,12 +161,12 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
- struct work_struct reset_work;
struct delayed_work connect_work;
struct kref ref;
u32 flags;
u32 iocnt;
+ wait_queue_head_t ioabort_wait;
struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS];
@@ -214,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
static DEFINE_IDA(nvme_fc_local_port_cnt);
static DEFINE_IDA(nvme_fc_ctrl_cnt);
-static struct workqueue_struct *nvme_fc_wq;
@@ -1241,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
spin_lock_irqsave(&ctrl->lock, flags);
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
+ if (ctrl->flags & FCCTRL_TERMIO) {
+ if (!--ctrl->iocnt)
+ wake_up(&ctrl->ioabort_wait);
+ }
}
if (op->flags & FCOP_FLAGS_RELEASED)
complete_rq = true;
@@ -1449,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
{
struct nvme_fc_ctrl *ctrl = set->driver_data;
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1];
-
- return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
-}
-
-static int
-nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct nvme_fc_ctrl *ctrl = set->driver_data;
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_queue *queue = &ctrl->queues[0];
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+ struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
}
@@ -1758,16 +1749,16 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
static void
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
{
+ /* only proceed if in LIVE state - e.g. on first error */
+ if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+ return;
+
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport association error detected: %s\n",
ctrl->cnum, errmsg);
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
- /* stop the queues on error, cleanup is in reset thread */
- if (ctrl->queue_count > 1)
- nvme_stop_queues(&ctrl->ctrl);
-
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
@@ -1775,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
return;
}
- if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
- dev_err(ctrl->ctrl.device,
- "NVME-FC{%d}: error_recovery: Failed to schedule "
- "reset work\n", ctrl->cnum);
+ nvme_reset_ctrl(&ctrl->ctrl);
}
static enum blk_eh_timer_return
@@ -1887,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
* level FC exchange resource that is also outstanding. This must be
* considered in all cleanup operations.
*/
-static int
+static blk_status_t
nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
struct nvme_fc_fcp_op *op, u32 data_len,
enum nvmefc_fcp_datadir io_dir)
@@ -1902,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
* the target device is present
*/
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
if (!nvme_fc_ctrl_get(ctrl))
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
/* format the FC-NVME CMD IU and fcp_req */
cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1953,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (ret < 0) {
nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (ret == -ENOMEM || ret == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
}
@@ -1971,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
queue->lldd_handle, &op->fcp_req);
if (ret) {
- if (op->rq) { /* normal request */
+ if (op->rq) /* normal request */
nvme_fc_unmap_data(ctrl, op->rq, op);
- nvme_cleanup_cmd(op->rq);
- }
/* else - aen. no cleanup needed */
nvme_fc_ctrl_put(ctrl);
if (ret != -EBUSY)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
if (op->rq) {
blk_mq_stop_hw_queues(op->rq->q);
blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
}
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static int
+static blk_status_t
nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -2005,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command *sqe = &cmdiu->sqe;
enum nvmefc_fcp_datadir io_dir;
u32 data_len;
- int ret;
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)
@@ -2060,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
struct nvme_fc_fcp_op *aen_op;
unsigned long flags;
bool terminating = false;
- int ret;
+ blk_status_t ret;
if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
return;
@@ -2092,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
FCOP_FLAGS_COMPLETE);
- nvme_cleanup_cmd(rq);
nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq);
nvme_fc_ctrl_put(ctrl);
@@ -2310,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
int ret;
bool changed;
- ++ctrl->ctrl.opts->nr_reconnects;
+ ++ctrl->ctrl.nr_reconnects;
/*
* Create the admin queue
@@ -2407,7 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- ctrl->ctrl.opts->nr_reconnects = 0;
+ ctrl->ctrl.nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
@@ -2493,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
/* wait for all io that had to be aborted */
spin_lock_irqsave(&ctrl->lock, flags);
- while (ctrl->iocnt) {
- spin_unlock_irqrestore(&ctrl->lock, flags);
- msleep(1000);
- spin_lock_irqsave(&ctrl->lock, flags);
- }
+ wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
ctrl->flags &= ~FCCTRL_TERMIO;
spin_unlock_irqrestore(&ctrl->lock, flags);
@@ -2527,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, delete_work);
- cancel_work_sync(&ctrl->reset_work);
+ cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/*
@@ -2554,7 +2536,7 @@ __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return true;
- if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return true;
return false;
@@ -2581,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
ret = __nvme_fc_del_ctrl(ctrl);
if (!ret)
- flush_workqueue(nvme_fc_wq);
+ flush_workqueue(nvme_wq);
nvme_put_ctrl(&ctrl->ctrl);
@@ -2606,13 +2588,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+ queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
- ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
+ ctrl->cnum, ctrl->ctrl.nr_reconnects);
WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
}
}
@@ -2621,7 +2603,7 @@ static void
nvme_fc_reset_ctrl_work(struct work_struct *work)
{
struct nvme_fc_ctrl *ctrl =
- container_of(work, struct nvme_fc_ctrl, reset_work);
+ container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
int ret;
/* will block will waiting for io to terminate */
@@ -2635,29 +2617,6 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
}
-/*
- * called by the nvme core layer, for sysfs interface that requests
- * a reset of the nvme controller
- */
-static int
-nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-
- dev_info(ctrl->ctrl.device,
- "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
-}
-
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.name = "fc",
.module = THIS_MODULE,
@@ -2665,11 +2624,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_fc_reset_nvme_ctrl,
.free_ctrl = nvme_fc_nvme_ctrl_freed,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_del_nvme_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
.get_address = nvmf_get_address,
};
@@ -2695,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
.queue_rq = nvme_fc_queue_rq,
.complete = nvme_fc_complete_rq,
- .init_request = nvme_fc_init_admin_request,
+ .init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
.reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_admin_hctx,
@@ -2740,7 +2697,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
kref_init(&ctrl->ref);
INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
spin_lock_init(&ctrl->lock);
@@ -2807,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
+ /* Remove core ctrl ref. */
+ nvme_put_ctrl(&ctrl->ctrl);
+
/* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here,
* the calling routine, thinking it's prior to the
@@ -2965,20 +2925,7 @@ static struct nvmf_transport_ops nvme_fc_transport = {
static int __init nvme_fc_init_module(void)
{
- int ret;
-
- nvme_fc_wq = create_workqueue("nvme_fc_wq");
- if (!nvme_fc_wq)
- return -ENOMEM;
-
- ret = nvmf_register_transport(&nvme_fc_transport);
- if (ret)
- goto err;
-
- return 0;
-err:
- destroy_workqueue(nvme_fc_wq);
- return ret;
+ return nvmf_register_transport(&nvme_fc_transport);
}
static void __exit nvme_fc_exit_module(void)
@@ -2989,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
nvmf_unregister_transport(&nvme_fc_transport);
- destroy_workqueue(nvme_fc_wq);
-
ida_destroy(&nvme_fc_local_port_cnt);
ida_destroy(&nvme_fc_ctrl_cnt);
}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f5df78ed1e10..be8541335e31 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
}
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
rqd->bio->bi_iter.bi_sector));
}
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
{
struct nvm_rq *rqd = rq->end_io_data;
@@ -509,7 +509,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
if (IS_ERR(rq)) {
kfree(cmd);
- return -ENOMEM;
+ return PTR_ERR(rq);
}
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
@@ -571,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.max_phys_sect = 64,
};
-static void nvme_nvm_end_user_vio(struct request *rq, int error)
-{
- struct completion *waiting = rq->end_io_data;
-
- complete(waiting);
-}
-
static int nvme_nvm_submit_user_cmd(struct request_queue *q,
struct nvme_ns *ns,
struct nvme_nvm_command *vcmd,
@@ -608,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
- rq->end_io_data = &wait;
if (ppa_buf && ppa_len) {
ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -662,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
}
submit:
- blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
-
- wait_for_completion_io(&wait);
+ blk_execute_rq(q, NULL, rq, 0);
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
ret = -EINTR;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070d4391..d70ff0fdd36b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
extern unsigned char admin_timeout;
#define ADMIN_TIMEOUT (admin_timeout * HZ)
-extern unsigned char shutdown_timeout;
-#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
-
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
+extern struct workqueue_struct *nvme_wq;
+
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
struct device *device; /* char device */
struct list_head node;
struct ida ns_ida;
+ struct work_struct reset_work;
struct opal_dev *opal_dev;
@@ -138,6 +138,7 @@ struct nvme_ctrl {
char serial[20];
char model[40];
char firmware_rev[8];
+ char subnqn[NVMF_NQN_SIZE];
u16 cntlid;
u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
u16 oncs;
u16 vid;
u16 oacs;
+ u16 nssa;
+ u16 nr_streams;
atomic_t abort_limit;
u8 event_limit;
u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
/* Power saving configuration */
u64 ps_max_latency_us;
+ bool apst_enabled;
+
+ u32 hmpre;
+ u32 hmmin;
/* Fabrics only */
u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
u32 iorcsz;
u16 icdoff;
u16 maxcmd;
+ int nr_reconnects;
struct nvmf_ctrl_options *opts;
};
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
struct nvme_ns {
struct list_head list;
@@ -189,14 +194,18 @@ struct nvme_ns {
int instance;
u8 eui[8];
- u8 uuid[16];
+ u8 nguid[16];
+ uuid_t uuid;
unsigned ns_id;
int lba_shift;
u16 ms;
+ u16 sgs;
+ u32 sws;
bool ext;
u8 pi_type;
unsigned long flags;
+ u16 noiob;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
@@ -214,11 +223,9 @@ struct nvme_ctrl_ops {
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
- int (*reset_ctrl)(struct nvme_ctrl *ctrl);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
int (*delete_ctrl)(struct nvme_ctrl *ctrl);
- const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
@@ -296,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
@@ -310,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen,
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
- struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result);
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
- void *buffer, size_t buflen, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 40c7581caeb0..33c3b9db7d36 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -17,28 +17,15 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
#include <linux/dmi.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h>
@@ -49,7 +36,6 @@
#include "nvme.h"
#define NVME_Q_DEPTH 1024
-#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+ "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
struct nvme_dev;
struct nvme_queue;
-static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
@@ -92,9 +80,8 @@ struct nvme_dev {
int q_depth;
u32 db_stride;
void __iomem *bar;
- struct work_struct reset_work;
+ unsigned long bar_mapped_size;
struct work_struct remove_work;
- struct timer_list watchdog_timer;
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
u32 cmbloc;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
+
+ /* shadow doorbell buffer support: */
u32 *dbbuf_dbs;
dma_addr_t dbbuf_dbs_dma_addr;
u32 *dbbuf_eis;
dma_addr_t dbbuf_eis_dma_addr;
+
+ /* host memory buffer support: */
+ u64 host_mem_size;
+ u32 nr_host_mem_descs;
+ struct nvme_host_mem_buf_desc *host_mem_descs;
+ void **host_mem_desc_bufs;
};
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
- BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
nvmeq->tags = NULL;
}
-static int nvme_admin_init_request(struct blk_mq_tag_set *set,
- struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- struct nvme_dev *dev = set->driver_data;
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[0];
-
- BUG_ON(!nvmeq);
- iod->nvmeq = nvmeq;
- return 0;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
{
struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+ int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+ struct nvme_queue *nvmeq = dev->queues[queue_idx];
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
if (!iod->sg)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
} else {
iod->sg = iod->inline_sg;
}
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
iod->nents = 0;
iod->length = size;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
return true;
}
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
enum dma_data_direction dma_dir = rq_data_dir(req) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
- int ret = BLK_MQ_RQ_QUEUE_ERROR;
+ blk_status_t ret = BLK_STS_IOERR;
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(q, req, iod->sg);
if (!iod->nents)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
DMA_ATTR_NO_WARN))
goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
if (!nvme_setup_prps(dev, req))
goto out_unmap;
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
if (blk_integrity_rq(req)) {
if (blk_rq_count_integrity_sg(q, req->bio) != 1)
goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
if (blk_integrity_rq(req))
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_unmap:
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
/*
* NOTE: ns is NULL when called on the admin queue.
*/
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_command cmnd;
- int ret = BLK_MQ_RQ_QUEUE_OK;
-
- /*
- * If formated with metadata, require the block layer provide a buffer
- * unless this namespace is formated such that the metadata can be
- * stripped/generated by the controller with PRACT=1.
- */
- if (ns && ns->ms && !blk_integrity_rq(req)) {
- if (!(ns->pi_type && ns->ms == 8) &&
- !blk_rq_is_passthrough(req)) {
- blk_mq_end_request(req, -EFAULT);
- return BLK_MQ_RQ_QUEUE_OK;
- }
- }
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &cmnd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
ret = nvme_init_iod(req, dev);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
goto out_free_cmd;
- if (blk_rq_nr_phys_segments(req))
+ if (blk_rq_nr_phys_segments(req)) {
ret = nvme_map_data(dev, req, &cmnd);
-
- if (ret != BLK_MQ_RQ_QUEUE_OK)
- goto out_cleanup_iod;
+ if (ret)
+ goto out_cleanup_iod;
+ }
blk_mq_start_request(req);
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
__nvme_submit_cmd(nvmeq, &cmnd);
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
}
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{
- u16 head, phase;
-
- head = nvmeq->cq_head;
- phase = nvmeq->cq_phase;
-
- while (nvme_cqe_valid(nvmeq, head, phase)) {
- struct nvme_completion cqe = nvmeq->cqes[head];
- struct request *req;
-
- if (++head == nvmeq->q_depth) {
- head = 0;
- phase = !phase;
- }
-
- if (tag && *tag == cqe.command_id)
- *tag = -1;
+ u16 head = nvmeq->cq_head;
- if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
- dev_warn(nvmeq->dev->ctrl.device,
- "invalid id %d completed on queue %d\n",
- cqe.command_id, le16_to_cpu(cqe.sq_id));
- continue;
- }
+ if (likely(nvmeq->cq_vector >= 0)) {
+ if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+ nvmeq->dbbuf_cq_ei))
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+ }
+}
- /*
- * AEN requests are special as they don't time out and can
- * survive any kind of queue freeze and often don't respond to
- * aborts. We don't even bother to allocate a struct request
- * for them but rather special case them here.
- */
- if (unlikely(nvmeq->qid == 0 &&
- cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
- nvme_complete_async_event(&nvmeq->dev->ctrl,
- cqe.status, &cqe.result);
- continue;
- }
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ struct request *req;
- req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
- nvme_end_request(req, cqe.status, cqe.result);
+ if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->dev->ctrl.device,
+ "invalid id %d completed on queue %d\n",
+ cqe->command_id, le16_to_cpu(cqe->sq_id));
+ return;
}
- if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvmeq->qid == 0 &&
+ cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+ nvme_complete_async_event(&nvmeq->dev->ctrl,
+ cqe->status, &cqe->result);
return;
+ }
- if (likely(nvmeq->cq_vector >= 0))
- if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
- nvmeq->dbbuf_cq_ei))
- writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
- nvmeq->cq_head = head;
- nvmeq->cq_phase = phase;
+ req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+ nvme_end_request(req, cqe->status, cqe->result);
+}
- nvmeq->cqe_seen = 1;
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+ *cqe = nvmeq->cqes[nvmeq->cq_head];
+
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
+ }
+ return true;
+ }
+ return false;
}
static void nvme_process_cq(struct nvme_queue *nvmeq)
{
- __nvme_process_cq(nvmeq, NULL);
+ struct nvme_completion cqe;
+ int consumed = 0;
+
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+ }
+
+ if (consumed) {
+ nvme_ring_cq_doorbell(nvmeq);
+ nvmeq->cqe_seen = 1;
+ }
}
static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
- spin_lock_irq(&nvmeq->q_lock);
- __nvme_process_cq(nvmeq, &tag);
- spin_unlock_irq(&nvmeq->q_lock);
+ struct nvme_completion cqe;
+ int found = 0, consumed = 0;
- if (tag == -1)
- return 1;
- }
+ if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ return 0;
- return 0;
+ spin_lock_irq(&nvmeq->q_lock);
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+
+ if (tag == cqe.command_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (consumed)
+ nvme_ring_cq_doorbell(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
+
+ return found;
}
static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
blk_mq_free_request(req);
}
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+ /* If true, indicates loss of adapter communication, possibly by a
+ * NVMe Subsystem reset.
+ */
+ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+ /* If there is a reset ongoing, we shouldn't reset again. */
+ if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ return false;
+
+ /* We shouldn't reset unless the controller is on fatal error state
+ * _or_ if we lost the communication with it.
+ */
+ if (!(csts & NVME_CSTS_CFS) && !nssro)
+ return false;
+
+ /* If PCI error recovery process is happening, we cannot reset or
+ * the recovery mechanism will surely fail.
+ */
+ if (pci_channel_offline(to_pci_dev(dev->dev)))
+ return false;
+
+ return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+ /* Read a config register to help see what died. */
+ u16 pci_status;
+ int result;
+
+ result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+ &pci_status);
+ if (result == PCIBIOS_SUCCESSFUL)
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+ csts, pci_status);
+ else
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+ csts, result);
+}
+
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd;
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Reset immediately if the controller is failed
+ */
+ if (nvme_should_reset(dev, csts)) {
+ nvme_warn_reset(dev, csts);
+ nvme_dev_disable(dev, false);
+ nvme_reset_ctrl(&dev->ctrl);
+ return BLK_EH_HANDLED;
+ }
/*
* Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
/*
* Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
- .init_request = nvme_admin_init_request,
+ .init_request = nvme_init_request,
.timeout = nvme_timeout,
};
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return 0;
}
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+ return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (size <= dev->bar_mapped_size)
+ return 0;
+ if (size > pci_resource_len(pdev, 0))
+ return -ENOMEM;
+ if (dev->bar)
+ iounmap(dev->bar);
+ dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+ if (!dev->bar) {
+ dev->bar_mapped_size = 0;
+ return -ENOMEM;
+ }
+ dev->bar_mapped_size = size;
+ dev->dbs = dev->bar + NVME_REG_DBS;
+
+ return 0;
+}
+
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
+ result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+ if (result < 0)
+ return result;
+
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
- /* If true, indicates loss of adapter communication, possibly by a
- * NVMe Subsystem reset.
- */
- bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
- return false;
-
- /* We shouldn't reset unless the controller is on fatal error state
- * _or_ if we lost the communication with it.
- */
- if (!(csts & NVME_CSTS_CFS) && !nssro)
- return false;
-
- /* If PCI error recovery process is happening, we cannot reset or
- * the recovery mechanism will surely fail.
- */
- if (pci_channel_offline(to_pci_dev(dev->dev)))
- return false;
-
- return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
- /* Read a config register to help see what died. */
- u16 pci_status;
- int result;
-
- result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
- &pci_status);
- if (result == PCIBIOS_SUCCESSFUL)
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
- csts, pci_status);
- else
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
- csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
- struct nvme_dev *dev = (struct nvme_dev *)data;
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
- /* Skip controllers under certain specific conditions. */
- if (nvme_should_reset(dev, csts)) {
- if (!nvme_reset(dev))
- nvme_warn_reset(dev, csts);
- return;
- }
-
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i, max;
@@ -1514,16 +1532,168 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
}
}
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+{
+ size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+ struct nvme_command c;
+ u64 dma_addr;
+ int ret;
+
+ dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev->dev, dma_addr))
+ return -ENOMEM;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+ c.features.dword11 = cpu_to_le32(bits);
+ c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
+ ilog2(dev->ctrl.page_size));
+ c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
+ c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
+ c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
+
+ ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+ if (ret) {
+ dev_warn(dev->ctrl.device,
+ "failed to set host mem (err %d, flags %#x).\n",
+ ret, bits);
+ }
+ dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+ return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->nr_host_mem_descs; i++) {
+ struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+ size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+ le64_to_cpu(desc->addr));
+ }
+
+ kfree(dev->host_mem_desc_bufs);
+ dev->host_mem_desc_bufs = NULL;
+ kfree(dev->host_mem_descs);
+ dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
- return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+ struct nvme_host_mem_buf_desc *descs;
+ u32 chunk_size, max_entries, i = 0;
+ void **bufs;
+ u64 size, tmp;
+
+ /* start big and work our way down */
+ chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+ tmp = (preferred + chunk_size - 1);
+ do_div(tmp, chunk_size);
+ max_entries = tmp;
+ descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+ if (!descs)
+ goto out;
+
+ bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+ if (!bufs)
+ goto out_free_descs;
+
+ for (size = 0; size < preferred; size += chunk_size) {
+ u32 len = min_t(u64, chunk_size, preferred - size);
+ dma_addr_t dma_addr;
+
+ bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+ if (!bufs[i])
+ break;
+
+ descs[i].addr = cpu_to_le64(dma_addr);
+ descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+ i++;
+ }
+
+ if (!size || (min && size < min)) {
+ dev_warn(dev->ctrl.device,
+ "failed to allocate host memory buffer.\n");
+ goto out_free_bufs;
+ }
+
+ dev_info(dev->ctrl.device,
+ "allocated %lld MiB host memory buffer.\n",
+ size >> ilog2(SZ_1M));
+ dev->nr_host_mem_descs = i;
+ dev->host_mem_size = size;
+ dev->host_mem_descs = descs;
+ dev->host_mem_desc_bufs = bufs;
+ return 0;
+
+out_free_bufs:
+ while (--i >= 0) {
+ size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, bufs[i],
+ le64_to_cpu(descs[i].addr));
+ }
+
+ kfree(bufs);
+out_free_descs:
+ kfree(descs);
+out:
+ /* try a smaller chunk size if we failed early */
+ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+ chunk_size /= 2;
+ goto retry;
+ }
+ dev->host_mem_descs = NULL;
+ return -ENOMEM;
+}
+
+static void nvme_setup_host_mem(struct nvme_dev *dev)
+{
+ u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+ u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+ u64 min = (u64)dev->ctrl.hmmin * 4096;
+ u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+ preferred = min(preferred, max);
+ if (min > max) {
+ dev_warn(dev->ctrl.device,
+ "min host memory (%lld MiB) above limit (%d MiB).\n",
+ min >> ilog2(SZ_1M), max_host_mem_size_mb);
+ nvme_free_host_mem(dev);
+ return;
+ }
+
+ /*
+ * If we already have a buffer allocated check if we can reuse it.
+ */
+ if (dev->host_mem_descs) {
+ if (dev->host_mem_size >= min)
+ enable_bits |= NVME_HOST_MEM_RETURN;
+ else
+ nvme_free_host_mem(dev);
+ }
+
+ if (!dev->host_mem_descs) {
+ if (nvme_alloc_host_mem(dev, min, preferred))
+ return;
+ }
+
+ if (nvme_set_host_mem(dev, enable_bits))
+ nvme_free_host_mem(dev);
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, nr_io_queues, size;
+ int result, nr_io_queues;
+ unsigned long size;
nr_io_queues = num_online_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nvme_release_cmb(dev);
}
- size = db_bar_size(dev, nr_io_queues);
- if (size > 8192) {
- iounmap(dev->bar);
- do {
- dev->bar = ioremap(pci_resource_start(pdev, 0), size);
- if (dev->bar)
- break;
- if (!--nr_io_queues)
- return -ENOMEM;
- size = db_bar_size(dev, nr_io_queues);
- } while (1);
- dev->dbs = dev->bar + 4096;
- adminq->q_db = dev->dbs;
- }
+ do {
+ size = db_bar_size(dev, nr_io_queues);
+ result = nvme_remap_bar(dev, size);
+ if (!result)
+ break;
+ if (!--nr_io_queues)
+ return -ENOMEM;
+ } while (1);
+ adminq->q_db = dev->dbs;
/* Deregister the admin queue's interrupt */
pci_free_irq(pdev, 0, adminq);
@@ -1586,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return nvme_create_io_queues(dev);
}
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
@@ -1594,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
complete(&nvmeq->dev->ioq_wait);
}
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
@@ -1799,8 +1964,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- del_timer_sync(&dev->watchdog_timer);
-
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
@@ -1816,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* Give the controller a chance to complete all entered requests if
* doing a safe shutdown.
*/
- if (!dead && shutdown)
- nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ if (!dead) {
+ if (shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+ /*
+ * If the controller is still alive tell it to stop using the
+ * host memory buffer. In theory the shutdown / reset should
+ * make sure that it doesn't access the host memoery anymore,
+ * but I'd rather be safe than sorry..
+ */
+ if (dev->host_mem_descs)
+ nvme_set_host_mem(dev, 0);
+
+ }
nvme_stop_queues(&dev->ctrl);
queues = dev->online_queues - 1;
@@ -1900,7 +2075,8 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
static void nvme_reset_work(struct work_struct *work)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ struct nvme_dev *dev =
+ container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
@@ -1949,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
"unable to allocate dma for dbbuf\n");
}
+ if (dev->ctrl.hmpre)
+ nvme_setup_host_mem(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@@ -1962,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->online_queues > 1)
nvme_queue_async_events(&dev->ctrl);
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
@@ -2003,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
nvme_put_ctrl(&dev->ctrl);
}
-static int nvme_reset(struct nvme_dev *dev)
-{
- if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
- return -ENODEV;
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
- if (!queue_work(nvme_workq, &dev->reset_work))
- return -EBUSY;
- return 0;
-}
-
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
*val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2032,16 +2198,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
return 0;
}
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
- struct nvme_dev *dev = to_nvme_dev(ctrl);
- int ret = nvme_reset(dev);
-
- if (!ret)
- flush_work(&dev->reset_work);
- return ret;
-}
-
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
@@ -2049,7 +2205,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
- .reset_ctrl = nvme_pci_reset_ctrl,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
};
@@ -2061,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_mem_regions(pdev, "nvme"))
return -ENODEV;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar)
+ if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
goto release;
return 0;
@@ -2116,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto free;
- INIT_WORK(&dev->reset_work, nvme_reset_work);
+ INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
- setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
- (unsigned long)dev);
mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait);
@@ -2137,7 +2289,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- queue_work(nvme_workq, &dev->reset_work);
+ queue_work(nvme_wq, &dev->ctrl.reset_work);
return 0;
release_pools:
@@ -2158,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
if (prepare)
nvme_dev_disable(dev, false);
else
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
}
static void nvme_shutdown(struct pci_dev *pdev)
@@ -2178,7 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
- cancel_work_sync(&dev->reset_work);
+ cancel_work_sync(&dev->ctrl.reset_work);
pci_set_drvdata(pdev, NULL);
if (!pci_device_is_present(pdev)) {
@@ -2186,9 +2338,10 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_dev_disable(dev, false);
}
- flush_work(&dev->reset_work);
+ flush_work(&dev->ctrl.reset_work);
nvme_uninit_ctrl(&dev->ctrl);
nvme_dev_disable(dev, true);
+ nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
nvme_release_prp_pools(dev);
@@ -2229,7 +2382,7 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset(ndev);
+ nvme_reset_ctrl(&ndev->ctrl);
return 0;
}
#endif
@@ -2268,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
@@ -2324,22 +2477,12 @@ static struct pci_driver nvme_driver = {
static int __init nvme_init(void)
{
- int result;
-
- nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
- if (!nvme_workq)
- return -ENOMEM;
-
- result = pci_register_driver(&nvme_driver);
- if (result)
- destroy_workqueue(nvme_workq);
- return result;
+ return pci_register_driver(&nvme_driver);
}
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- destroy_workqueue(nvme_workq);
_nvme_check_size();
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 24397d306d53..6d4119dfbdaa 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -48,7 +48,7 @@
*/
#define NVME_RDMA_NR_AEN_COMMANDS 1
#define NVME_RDMA_AQ_BLKMQ_DEPTH \
- (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
struct nvme_rdma_device {
struct ib_device *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
};
enum nvme_rdma_queue_flags {
- NVME_RDMA_Q_CONNECTED = (1 << 0),
- NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
- NVME_RDMA_Q_DELETING = (1 << 2),
- NVME_RDMA_Q_LIVE = (1 << 3),
+ NVME_RDMA_Q_LIVE = 0,
+ NVME_RDMA_Q_DELETING = 1,
};
struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
};
struct nvme_rdma_ctrl {
- /* read and written in the hot path */
- spinlock_t lock;
-
/* read only in the hot path */
struct nvme_rdma_queue *queues;
u32 queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
/* other member variables */
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
- struct work_struct reset_work;
struct work_struct err_work;
struct nvme_rdma_qe async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(nvme_rdma_ctrl_list);
static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
-static struct workqueue_struct *nvme_rdma_wq;
-
/*
* Disabling this option makes small I/O goes faster, but is fundamentally
* unsafe. With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ out:
return ret;
}
-static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
- struct request *rq, unsigned int queue_idx)
+static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx)
{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
DMA_TO_DEVICE);
}
-static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx)
-{
- return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx)
-{
- return __nvme_rdma_exit_request(set->driver_data, rq, 0);
-}
-
-static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
- struct request *rq, unsigned int queue_idx)
+static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx,
+ unsigned int numa_node)
{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ out_free_qe:
return -ENOMEM;
}
-static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return __nvme_rdma_init_request(set->driver_data, rq, 0);
-}
-
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
struct nvme_rdma_device *dev;
struct ib_device *ibdev;
- if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
- return;
-
dev = queue->device;
ibdev = dev->dev;
rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev);
}
-static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
- struct nvme_rdma_device *dev)
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{
- struct ib_device *ibdev = dev->dev;
+ struct ib_device *ibdev;
const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue);
-
int ret;
- queue->device = dev;
+ queue->device = nvme_rdma_find_get_device(queue->cm_id);
+ if (!queue->device) {
+ dev_err(queue->cm_id->device->dev.parent,
+ "no client data found!\n");
+ return -ECONNREFUSED;
+ }
+ ibdev = queue->device->dev;
/*
* The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
/* +1 for ib_stop_cq */
- queue->ib_cq = ib_alloc_cq(dev->dev, queue,
- cq_factor * queue->queue_size + 1, comp_vector,
- IB_POLL_SOFTIRQ);
+ queue->ib_cq = ib_alloc_cq(ibdev, queue,
+ cq_factor * queue->queue_size + 1,
+ comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq);
- goto out;
+ goto out_put_dev;
}
ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
ret = -ENOMEM;
goto out_destroy_qp;
}
- set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
return 0;
@@ -532,7 +503,8 @@ out_destroy_qp:
ib_destroy_qp(queue->qp);
out_destroy_ib_cq:
ib_free_cq(queue->ib_cq);
-out:
+out_put_dev:
+ nvme_rdma_dev_put(queue->device);
return ret;
}
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
}
clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
- set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
return 0;
out_destroy_cm_id:
- nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
return ret;
}
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
if (nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+ queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->ctrl.device, "Removing controller...\n");
- queue_work(nvme_rdma_wq, &ctrl->delete_work);
+ queue_work(nvme_wq, &ctrl->delete_work);
}
}
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
bool changed;
int ret;
- ++ctrl->ctrl.opts->nr_reconnects;
+ ++ctrl->ctrl.nr_reconnects;
if (ctrl->queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
@@ -749,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ret)
goto requeue;
- ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+ ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (ret)
goto requeue;
@@ -777,7 +747,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- ctrl->ctrl.opts->nr_reconnects = 0;
+ ctrl->ctrl.nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_queue_scan(&ctrl->ctrl);
@@ -790,7 +760,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
- ctrl->ctrl.opts->nr_reconnects);
+ ctrl->ctrl.nr_reconnects);
nvme_rdma_reconnect_or_remove(ctrl);
}
@@ -802,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
nvme_stop_keep_alive(&ctrl->ctrl);
- for (i = 0; i < ctrl->queue_count; i++) {
- clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+ for (i = 0; i < ctrl->queue_count; i++)
clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
- }
if (ctrl->queue_count > 1)
nvme_stop_queues(&ctrl->ctrl);
@@ -833,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
return;
- queue_work(nvme_rdma_wq, &ctrl->err_work);
+ queue_work(nvme_wq, &ctrl->err_work);
}
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1278,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
{
- struct nvme_rdma_device *dev;
int ret;
- dev = nvme_rdma_find_get_device(queue->cm_id);
- if (!dev) {
- dev_err(queue->cm_id->device->dev.parent,
- "no client data found!\n");
- return -ECONNREFUSED;
- }
-
- ret = nvme_rdma_create_queue_ib(queue, dev);
- if (ret) {
- nvme_rdma_dev_put(dev);
- goto out;
- }
+ ret = nvme_rdma_create_queue_ib(queue);
+ if (ret)
+ return ret;
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
@@ -1306,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
out_destroy_queue:
nvme_rdma_destroy_queue_ib(queue);
-out:
return ret;
}
@@ -1334,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
* specified by the Fabrics standard.
*/
if (priv.qid == 0) {
- priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
- priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+ priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+ priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
} else {
/*
* current interpretation of the fabrics spec
@@ -1383,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
complete(&queue->cm_done);
return 0;
case RDMA_CM_EVENT_REJECTED:
+ nvme_rdma_destroy_queue_ib(queue);
cm_error = nvme_rdma_conn_rejected(queue, ev);
break;
- case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
+ nvme_rdma_destroy_queue_ib(queue);
+ case RDMA_CM_EVENT_ADDR_ERROR:
dev_dbg(queue->ctrl->ctrl.device,
"CM error event %d\n", ev->event);
cm_error = -ECONNRESET;
@@ -1435,8 +1394,8 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
/*
* We cannot accept any other command until the Connect command has completed.
*/
-static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
- struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
struct nvme_command *cmd = nvme_req(rq)->cmd;
@@ -1452,16 +1411,15 @@ static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
* failover.
*/
if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
- return -EIO;
- else
- return -EAGAIN;
+ return BLK_STS_IOERR;
+ return BLK_STS_RESOURCE; /* try again later */
}
}
return 0;
}
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1472,28 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
- int ret;
+ blk_status_t ret;
+ int err;
WARN_ON_ONCE(rq->tag < 0);
ret = nvme_rdma_queue_is_ready(queue, rq);
if (unlikely(ret))
- goto err;
+ return ret;
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
blk_mq_start_request(rq);
- ret = nvme_rdma_map_data(queue, rq, c);
- if (ret < 0) {
+ err = nvme_rdma_map_data(queue, rq, c);
+ if (err < 0) {
dev_err(queue->ctrl->ctrl.device,
- "Failed to map data (%d)\n", ret);
+ "Failed to map data (%d)\n", err);
nvme_cleanup_cmd(rq);
goto err;
}
@@ -1503,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
- ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+ err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
- if (ret) {
+ if (err) {
nvme_rdma_unmap_data(queue, rq);
goto err;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
err:
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (err == -ENOMEM || err == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1523,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
struct ib_wc wc;
int found = 0;
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
struct ib_cqe *cqe = wc.wr_cqe;
@@ -1560,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.queue_rq = nvme_rdma_queue_rq,
.complete = nvme_rdma_complete_rq,
- .init_request = nvme_rdma_init_admin_request,
- .exit_request = nvme_rdma_exit_admin_request,
+ .init_request = nvme_rdma_init_request,
+ .exit_request = nvme_rdma_exit_request,
.reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_admin_hctx,
.timeout = nvme_rdma_timeout,
@@ -1571,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
{
int error;
- error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+ error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (error)
return error;
@@ -1672,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_free_io_queues(ctrl);
}
- if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
+ if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1709,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
@@ -1743,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
- struct nvme_rdma_ctrl *ctrl = container_of(work,
- struct nvme_rdma_ctrl, reset_work);
+ struct nvme_rdma_ctrl *ctrl =
+ container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
int ret;
bool changed;
@@ -1785,22 +1744,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
del_dead_ctrl:
/* Deleting this dead controller... */
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
-}
-
-static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
+ WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1810,11 +1754,9 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_rdma_reset_ctrl,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_del_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
.get_address = nvmf_get_address,
};
@@ -1919,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
nvme_rdma_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
- spin_lock_init(&ctrl->lock);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1939,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
/* sanity check icdoff */
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+ ret = -EINVAL;
goto out_remove_admin_queue;
}
/* sanity check keyed sgls */
if (!(ctrl->ctrl.sgls & (1 << 20))) {
dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+ ret = -EINVAL;
goto out_remove_admin_queue;
}
@@ -2033,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
- flush_workqueue(nvme_rdma_wq);
+ flush_workqueue(nvme_wq);
}
static struct ib_client nvme_rdma_ib_client = {
@@ -2046,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
{
int ret;
- nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
- if (!nvme_rdma_wq)
- return -ENOMEM;
-
ret = ib_register_client(&nvme_rdma_ib_client);
if (ret)
- goto err_destroy_wq;
+ return ret;
ret = nvmf_register_transport(&nvme_rdma_transport);
if (ret)
@@ -2062,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
err_unreg_client:
ib_unregister_client(&nvme_rdma_ib_client);
-err_destroy_wq:
- destroy_workqueue(nvme_rdma_wq);
return ret;
}
@@ -2071,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
{
nvmf_unregister_transport(&nvme_rdma_transport);
ib_unregister_client(&nvme_rdma_ib_client);
- destroy_workqueue(nvme_rdma_wq);
}
module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644
index 1f7671e631dd..000000000000
--- a/drivers/nvme/host/scsi.c
+++ /dev/null
@@ -1,2460 +0,0 @@
-/*
- * NVM Express device driver
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-/*
- * Refer to the SCSI-NVMe Translation spec for details on how
- * each command is translated.
- */
-
-#include <linux/bio.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/unaligned.h>
-#include <scsi/sg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_request.h>
-
-#include "nvme.h"
-
-static int sg_version_num = 30534; /* 2 digits for each component */
-
-/* VPD Page Codes */
-#define VPD_SUPPORTED_PAGES 0x00
-#define VPD_SERIAL_NUMBER 0x80
-#define VPD_DEVICE_IDENTIFIERS 0x83
-#define VPD_EXTENDED_INQUIRY 0x86
-#define VPD_BLOCK_LIMITS 0xB0
-#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
-
-/* format unit paramter list offsets */
-#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
-#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
-#define FORMAT_UNIT_PROT_INT_OFFSET 3
-#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
-#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
-
-/* Misc. defines */
-#define FIXED_SENSE_DATA 0x70
-#define DESC_FORMAT_SENSE_DATA 0x72
-#define FIXED_SENSE_DATA_ADD_LENGTH 10
-#define LUN_ENTRY_SIZE 8
-#define LUN_DATA_HEADER_SIZE 8
-#define ALL_LUNS_RETURNED 0x02
-#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01
-#define RESTRICTED_LUNS_RETURNED 0x00
-#define DOWNLOAD_SAVE_ACTIVATE 0x05
-#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E
-#define ACTIVATE_DEFERRED_MICROCODE 0x0F
-#define FORMAT_UNIT_IMMED_MASK 0x2
-#define FORMAT_UNIT_IMMED_OFFSET 1
-#define KELVIN_TEMP_FACTOR 273
-#define FIXED_FMT_SENSE_DATA_SIZE 18
-#define DESC_FMT_SENSE_DATA_SIZE 8
-
-/* SCSI/NVMe defines and bit masks */
-#define INQ_STANDARD_INQUIRY_PAGE 0x00
-#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00
-#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80
-#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83
-#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86
-#define INQ_BDEV_LIMITS_PAGE 0xB0
-#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1
-#define INQ_SERIAL_NUMBER_LENGTH 0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES 6
-#define VERSION_SPC_4 0x06
-#define ACA_UNSUPPORTED 0
-#define STANDARD_INQUIRY_LENGTH 36
-#define ADDITIONAL_STD_INQ_LENGTH 31
-#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
-#define RESERVED_FIELD 0
-
-/* Mode Sense/Select defines */
-#define MODE_PAGE_INFO_EXCEP 0x1C
-#define MODE_PAGE_CACHING 0x08
-#define MODE_PAGE_CONTROL 0x0A
-#define MODE_PAGE_POWER_CONDITION 0x1A
-#define MODE_PAGE_RETURN_ALL 0x3F
-#define MODE_PAGE_BLK_DES_LEN 0x08
-#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10
-#define MODE_PAGE_CACHING_LEN 0x14
-#define MODE_PAGE_CONTROL_LEN 0x0C
-#define MODE_PAGE_POW_CND_LEN 0x28
-#define MODE_PAGE_INF_EXC_LEN 0x0C
-#define MODE_PAGE_ALL_LEN 0x54
-#define MODE_SENSE6_MPH_SIZE 4
-#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
-#define MODE_SENSE_PAGE_CODE_OFFSET 2
-#define MODE_SENSE_PAGE_CODE_MASK 0x3F
-#define MODE_SENSE_LLBAA_MASK 0x10
-#define MODE_SENSE_LLBAA_SHIFT 4
-#define MODE_SENSE_DBD_MASK 8
-#define MODE_SENSE_DBD_SHIFT 3
-#define MODE_SENSE10_MPH_SIZE 8
-#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
-#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
-#define MODE_SELECT_6_BD_OFFSET 3
-#define MODE_SELECT_10_BD_OFFSET 6
-#define MODE_SELECT_10_LLBAA_OFFSET 4
-#define MODE_SELECT_10_LLBAA_MASK 1
-#define MODE_SELECT_6_MPH_SIZE 4
-#define MODE_SELECT_10_MPH_SIZE 8
-#define CACHING_MODE_PAGE_WCE_MASK 0x04
-#define MODE_SENSE_BLK_DESC_ENABLED 0
-#define MODE_SENSE_BLK_DESC_COUNT 1
-#define MODE_SELECT_PAGE_CODE_MASK 0x3F
-#define SHORT_DESC_BLOCK 8
-#define LONG_DESC_BLOCK 16
-#define MODE_PAGE_POW_CND_LEN_FIELD 0x26
-#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A
-#define MODE_PAGE_CACHING_LEN_FIELD 0x12
-#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A
-#define MODE_SENSE_PC_CURRENT_VALUES 0
-
-/* Log Sense defines */
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
-#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
-#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
-#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
-#define LOG_SENSE_CDB_PC_MASK 0xC0
-#define LOG_SENSE_CDB_PC_SHIFT 6
-#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
-#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
-#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
-#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
-#define REMAINING_TEMP_PAGE_LENGTH 0xC
-#define LOG_TEMP_PAGE_LENGTH 0x10
-#define LOG_TEMP_UNKNOWN 0xFF
-#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3
-
-/* Read Capacity defines */
-#define READ_CAP_10_RESP_SIZE 8
-#define READ_CAP_16_RESP_SIZE 32
-
-/* NVMe Namespace and Command Defines */
-#define BYTES_TO_DWORDS 4
-#define NVME_MAX_FIRMWARE_SLOT 7
-
-/* Report LUNs defines */
-#define REPORT_LUNS_FIRST_LUN_OFFSET 8
-
-/* SCSI ADDITIONAL SENSE Codes */
-
-#define SCSI_ASC_NO_SENSE 0x00
-#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03
-#define SCSI_ASC_LUN_NOT_READY 0x04
-#define SCSI_ASC_WARNING 0x0B
-#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10
-#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10
-#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10
-#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11
-#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D
-#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20
-#define SCSI_ASC_ILLEGAL_COMMAND 0x20
-#define SCSI_ASC_ILLEGAL_BLOCK 0x21
-#define SCSI_ASC_INVALID_CDB 0x24
-#define SCSI_ASC_INVALID_LUN 0x25
-#define SCSI_ASC_INVALID_PARAMETER 0x26
-#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31
-#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44
-
-/* SCSI ADDITIONAL SENSE Code Qualifiers */
-
-#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00
-#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01
-#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01
-#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02
-#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03
-#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04
-#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
-#define SCSI_ASCQ_INVALID_LUN_ID 0x09
-
-/* copied from drivers/usb/gadget/function/storage_common.h */
-static inline u32 get_unaligned_be24(u8 *buf)
-{
- return 0xffffff & (u32) get_unaligned_be32(buf - 1);
-}
-
-/* Struct to gather data that needs to be extracted from a SCSI CDB.
- Not conforming to any particular CDB variant, but compatible with all. */
-
-struct nvme_trans_io_cdb {
- u8 fua;
- u8 prot_info;
- u64 lba;
- u32 xfer_len;
-};
-
-
-/* Internal Helper Functions */
-
-
-/* Copy data to userspace memory */
-
-static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
- unsigned long n)
-{
- int i;
- void *index = from;
- size_t remaining = n;
- size_t xfer_len;
-
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- for (i = 0; i < hdr->iovec_count; i++) {
- if (copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec)))
- return -EFAULT;
- xfer_len = min(remaining, sgl.iov_len);
- if (copy_to_user(sgl.iov_base, index, xfer_len))
- return -EFAULT;
-
- index += xfer_len;
- remaining -= xfer_len;
- if (remaining == 0)
- break;
- }
- return 0;
- }
-
- if (copy_to_user(hdr->dxferp, from, n))
- return -EFAULT;
- return 0;
-}
-
-/* Copy data from userspace memory */
-
-static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
- unsigned long n)
-{
- int i;
- void *index = to;
- size_t remaining = n;
- size_t xfer_len;
-
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- for (i = 0; i < hdr->iovec_count; i++) {
- if (copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec)))
- return -EFAULT;
- xfer_len = min(remaining, sgl.iov_len);
- if (copy_from_user(index, sgl.iov_base, xfer_len))
- return -EFAULT;
- index += xfer_len;
- remaining -= xfer_len;
- if (remaining == 0)
- break;
- }
- return 0;
- }
-
- if (copy_from_user(to, hdr->dxferp, n))
- return -EFAULT;
- return 0;
-}
-
-/* Status/Sense Buffer Writeback */
-
-static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
- u8 asc, u8 ascq)
-{
- u8 xfer_len;
- u8 resp[DESC_FMT_SENSE_DATA_SIZE];
-
- if (scsi_status_is_good(status)) {
- hdr->status = SAM_STAT_GOOD;
- hdr->masked_status = GOOD;
- hdr->host_status = DID_OK;
- hdr->driver_status = DRIVER_OK;
- hdr->sb_len_wr = 0;
- } else {
- hdr->status = status;
- hdr->masked_status = status >> 1;
- hdr->host_status = DID_OK;
- hdr->driver_status = DRIVER_OK;
-
- memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
- resp[0] = DESC_FORMAT_SENSE_DATA;
- resp[1] = sense_key;
- resp[2] = asc;
- resp[3] = ascq;
-
- xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
- hdr->sb_len_wr = xfer_len;
- if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/*
- * Take a status code from a lowlevel routine, and if it was a positive NVMe
- * error code update the sense data based on it. In either case the passed
- * in value is returned again, unless an -EFAULT from copy_to_user overrides
- * it.
- */
-static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
-{
- u8 status, sense_key, asc, ascq;
- int res;
-
- /* For non-nvme (Linux) errors, simply return the error code */
- if (nvme_sc < 0)
- return nvme_sc;
-
- /* Mask DNR, More, and reserved fields */
- switch (nvme_sc & 0x7FF) {
- /* Generic Command Status */
- case NVME_SC_SUCCESS:
- status = SAM_STAT_GOOD;
- sense_key = NO_SENSE;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_OPCODE:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ILLEGAL_COMMAND;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_FIELD:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_INVALID_CDB;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_DATA_XFER_ERROR:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_POWER_LOSS:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_WARNING;
- ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
- break;
- case NVME_SC_INTERNAL:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = HARDWARE_ERROR;
- asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ABORT_REQ:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ABORT_QUEUE:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_FUSED_FAIL:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_FUSED_MISSING:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_NS:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
- ascq = SCSI_ASCQ_INVALID_LUN_ID;
- break;
- case NVME_SC_LBA_RANGE:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ILLEGAL_BLOCK;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_CAP_EXCEEDED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_NS_NOT_READY:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = NOT_READY;
- asc = SCSI_ASC_LUN_NOT_READY;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
-
- /* Command Specific Status */
- case NVME_SC_INVALID_FORMAT:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
- ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
- break;
- case NVME_SC_BAD_ATTRIBUTES:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_INVALID_CDB;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
-
- /* Media Errors */
- case NVME_SC_WRITE_FAULT:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_READ_ERROR:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_GUARD_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
- break;
- case NVME_SC_APPTAG_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
- break;
- case NVME_SC_REFTAG_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
- break;
- case NVME_SC_COMPARE_FAILED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MISCOMPARE;
- asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ACCESS_DENIED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
- ascq = SCSI_ASCQ_INVALID_LUN_ID;
- break;
-
- /* Unspecified/Default */
- case NVME_SC_CMDID_CONFLICT:
- case NVME_SC_CMD_SEQ_ERROR:
- case NVME_SC_CQ_INVALID:
- case NVME_SC_QID_INVALID:
- case NVME_SC_QUEUE_SIZE:
- case NVME_SC_ABORT_LIMIT:
- case NVME_SC_ABORT_MISSING:
- case NVME_SC_ASYNC_LIMIT:
- case NVME_SC_FIRMWARE_SLOT:
- case NVME_SC_FIRMWARE_IMAGE:
- case NVME_SC_INVALID_VECTOR:
- case NVME_SC_INVALID_LOG_PAGE:
- default:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- }
-
- res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
- return res ? res : nvme_sc;
-}
-
-/* INQUIRY Helper Functions */
-
-static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ns *id_ns;
- int res;
- int nvme_sc;
- int xfer_len;
- u8 resp_data_format = 0x02;
- u8 protect;
- u8 cmdque = 0x01 << 1;
- u8 fw_offset = sizeof(ctrl->firmware_rev);
-
- /* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- if (id_ns->dps)
- protect = 0x01;
- else
- protect = 0;
- kfree(id_ns);
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[2] = VERSION_SPC_4;
- inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */
- inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
- inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
- inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
- strncpy(&inq_response[8], "NVMe ", 8);
- strncpy(&inq_response[16], ctrl->model, 16);
-
- while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
- fw_offset--;
- fw_offset -= 4;
- strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- int xfer_len;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */
- inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */
- inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
- inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
- inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
- inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
- inq_response[9] = INQ_BDEV_LIMITS_PAGE;
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- int xfer_len;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
- inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
- strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
-{
- struct nvme_id_ns *id_ns;
- int nvme_sc, res;
- size_t len;
- void *eui;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- eui = id_ns->eui64;
- len = sizeof(id_ns->eui64);
-
- if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
- if (bitmap_empty(eui, len * 8)) {
- eui = id_ns->nguid;
- len = sizeof(id_ns->nguid);
- }
- }
-
- if (bitmap_empty(eui, len * 8)) {
- res = -EOPNOTSUPP;
- goto out_free_id;
- }
-
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[3] = 4 + len; /* Page Length */
-
- /* Designation Descriptor start */
- inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
- inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = len; /* Designator Length */
- memcpy(&inq_response[8], eui, len);
-
- res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-out_free_id:
- kfree(id_ns);
- return res;
-}
-
-static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ctrl *id_ctrl;
- int nvme_sc, res;
-
- if (alloc_len < 72) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[3] = 0x48; /* Page Length */
-
- /* Designation Descriptor start */
- inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
- inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = 0x44; /* Designator Length */
-
- sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
- memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
- sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
- memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
-
- res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
- kfree(id_ctrl);
- return res;
-}
-
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int alloc_len)
-{
- int res;
-
- if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
- res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
- if (res != -EOPNOTSUPP)
- return res;
- }
-
- return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
-}
-
-static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- u8 *inq_response;
- int res;
- int nvme_sc;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ctrl *id_ctrl;
- struct nvme_id_ns *id_ns;
- int xfer_len;
- u8 microcode = 0x80;
- u8 spt;
- u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
- u8 grd_chk, app_chk, ref_chk, protect;
- u8 uask_sup = 0x20;
- u8 v_sup;
- u8 luiclr = 0x01;
-
- inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL)
- return -ENOMEM;
-
- nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free_inq;
-
- spt = spt_lut[id_ns->dpc & 0x07] << 3;
- if (id_ns->dps)
- protect = 0x01;
- else
- protect = 0;
- kfree(id_ns);
-
- grd_chk = protect << 2;
- app_chk = protect << 1;
- ref_chk = protect;
-
- nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free_inq;
-
- v_sup = id_ctrl->vwc;
- kfree(id_ctrl);
-
- memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
- inq_response[2] = 0x00; /* Page Length MSB */
- inq_response[3] = 0x3C; /* Page Length LSB */
- inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
- inq_response[5] = uask_sup;
- inq_response[6] = v_sup;
- inq_response[7] = luiclr;
- inq_response[8] = 0;
- inq_response[9] = 0;
-
- xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free_inq:
- kfree(inq_response);
- return res;
-}
-
-static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
-{
- __be32 max_sectors = cpu_to_be32(
- nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
- __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
- __be32 discard_desc_count = cpu_to_be32(0x100);
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = VPD_BLOCK_LIMITS;
- inq_response[3] = 0x3c; /* Page Length */
- memcpy(&inq_response[8], &max_sectors, sizeof(u32));
- memcpy(&inq_response[20], &max_discard, sizeof(u32));
-
- if (max_discard)
- memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
-
- return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
-}
-
-static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- u8 *inq_response;
- int res;
- int xfer_len;
-
- inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
- inq_response[2] = 0x00; /* Page Length MSB */
- inq_response[3] = 0x3C; /* Page Length LSB */
- inq_response[4] = 0x00; /* Medium Rotation Rate MSB */
- inq_response[5] = 0x01; /* Medium Rotation Rate LSB */
- inq_response[6] = 0x00; /* Form Factor */
-
- xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- kfree(inq_response);
- out_mem:
- return res;
-}
-
-/* LOG SENSE Helper Functions */
-
-static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
-
- log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
- log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
- log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
- log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
-
- xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- kfree(log_response);
- out_mem:
- return res;
-}
-
-static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
- struct nvme_smart_log *smart_log;
- u8 temp_c;
- u16 temp_k;
-
- log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL)
- return -ENOMEM;
-
- res = nvme_get_log_page(ns->ctrl, &smart_log);
- if (res < 0)
- goto out_free_response;
-
- if (res != NVME_SC_SUCCESS) {
- temp_c = LOG_TEMP_UNKNOWN;
- } else {
- temp_k = (smart_log->temperature[1] << 8) +
- (smart_log->temperature[0]);
- temp_c = temp_k - KELVIN_TEMP_FACTOR;
- }
- kfree(smart_log);
-
- log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
- /* Informational Exceptions Log Parameter 1 Start */
- /* Parameter Code=0x0000 bytes 4,5 */
- log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
- log_response[7] = 0x04; /* PARAMETER LENGTH */
- /* Add sense Code and qualifier = 0x00 each */
- /* Use Temperature from NVMe Get Log Page, convert to C from K */
- log_response[10] = temp_c;
-
- xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
- kfree(log_response);
- return res;
-}
-
-static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
- struct nvme_smart_log *smart_log;
- u32 feature_resp;
- u8 temp_c_cur, temp_c_thresh;
- u16 temp_k;
-
- log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL)
- return -ENOMEM;
-
- res = nvme_get_log_page(ns->ctrl, &smart_log);
- if (res < 0)
- goto out_free_response;
-
- if (res != NVME_SC_SUCCESS) {
- temp_c_cur = LOG_TEMP_UNKNOWN;
- } else {
- temp_k = (smart_log->temperature[1] << 8) +
- (smart_log->temperature[0]);
- temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
- }
- kfree(smart_log);
-
- /* Get Features for Temp Threshold */
- res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
- &feature_resp);
- if (res != NVME_SC_SUCCESS)
- temp_c_thresh = LOG_TEMP_UNKNOWN;
- else
- temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
-
- log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
- /* Temperature Log Parameter 1 (Temperature) Start */
- /* Parameter Code = 0x0000 */
- log_response[6] = 0x01; /* Format and Linking = 01b */
- log_response[7] = 0x02; /* Parameter Length */
- /* Use Temperature from NVMe Get Log Page, convert to C from K */
- log_response[9] = temp_c_cur;
- /* Temperature Log Parameter 2 (Reference Temperature) Start */
- log_response[11] = 0x01; /* Parameter Code = 0x0001 */
- log_response[12] = 0x01; /* Format and Linking = 01b */
- log_response[13] = 0x02; /* Parameter Length */
- /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
- log_response[15] = temp_c_thresh;
-
- xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
- kfree(log_response);
- return res;
-}
-
-/* MODE SENSE Helper Functions */
-
-static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
- u16 mode_data_length, u16 blk_desc_len)
-{
- /* Quick check to make sure I don't stomp on my own memory... */
- if ((cdb10 && len < 8) || (!cdb10 && len < 4))
- return -EINVAL;
-
- if (cdb10) {
- resp[0] = (mode_data_length & 0xFF00) >> 8;
- resp[1] = (mode_data_length & 0x00FF);
- resp[3] = 0x10 /* DPOFUA */;
- resp[4] = llbaa;
- resp[5] = RESERVED_FIELD;
- resp[6] = (blk_desc_len & 0xFF00) >> 8;
- resp[7] = (blk_desc_len & 0x00FF);
- } else {
- resp[0] = (mode_data_length & 0x00FF);
- resp[2] = 0x10 /* DPOFUA */;
- resp[3] = (blk_desc_len & 0x00FF);
- }
-
- return 0;
-}
-
-static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int len, u8 llbaa)
-{
- int res;
- int nvme_sc;
- struct nvme_id_ns *id_ns;
- u8 flbas;
- u32 lba_length;
-
- if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
- return -EINVAL;
- else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
- return -EINVAL;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- flbas = (id_ns->flbas) & 0x0F;
- lba_length = (1 << (id_ns->lbaf[flbas].ds));
-
- if (llbaa == 0) {
- __be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
- /* Byte 4 is reserved */
- __be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
-
- memcpy(resp, &tmp_cap, sizeof(u32));
- memcpy(&resp[4], &tmp_len, sizeof(u32));
- } else {
- __be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
- __be32 tmp_len = cpu_to_be32(lba_length);
-
- memcpy(resp, &tmp_cap, sizeof(u64));
- /* Bytes 8, 9, 10, 11 are reserved */
- memcpy(&resp[12], &tmp_len, sizeof(u32));
- }
-
- kfree(id_ns);
- return res;
-}
-
-static int nvme_trans_fill_control_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_CONTROL_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_CONTROL;
- resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
- resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1,
- * D_SENSE=1, GLTSD=1, RLEC=0 */
- resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
- /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */
- resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
- /* resp[6] and [7] are obsolete, thus zero */
- resp[8] = 0xFF; /* Busy timeout period = 0xffff */
- resp[9] = 0xFF;
- /* Bytes 10,11: Extended selftest completion time = 0x0000 */
-
- return 0;
-}
-
-static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr,
- u8 *resp, int len)
-{
- int res = 0;
- int nvme_sc;
- u32 feature_resp;
- u8 vwc;
-
- if (len < MODE_PAGE_CACHING_LEN)
- return -EINVAL;
-
- nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
- &feature_resp);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- vwc = feature_resp & 0x00000001;
-
- resp[0] = MODE_PAGE_CACHING;
- resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
- resp[2] = vwc << 2;
- return 0;
-}
-
-static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_POW_CND_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_POWER_CONDITION;
- resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
- /* All other bytes are zero */
-
- return 0;
-}
-
-static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_INF_EXC_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_INFO_EXCEP;
- resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
- resp[2] = 0x88;
- /* All other bytes are zero */
-
- return 0;
-}
-
-static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int len)
-{
- int res;
- u16 mode_pages_offset_1 = 0;
- u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
-
- mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
- mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
- mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
-
- res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
- MODE_PAGE_CACHING_LEN);
- if (res)
- return res;
- res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
- MODE_PAGE_CONTROL_LEN);
- if (res)
- return res;
- res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
- MODE_PAGE_POW_CND_LEN);
- if (res)
- return res;
- return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
- MODE_PAGE_INF_EXC_LEN);
-}
-
-static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
-{
- if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
- /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
- return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
- } else {
- return 0;
- }
-}
-
-static int nvme_trans_mode_page_create(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *cmd,
- u16 alloc_len, u8 cdb10,
- int (*mode_page_fill_func)
- (struct nvme_ns *,
- struct sg_io_hdr *hdr, u8 *, int),
- u16 mode_pages_tot_len)
-{
- int res;
- int xfer_len;
- u8 *response;
- u8 dbd, llbaa;
- u16 resp_size;
- int mph_size;
- u16 mode_pages_offset_1;
- u16 blk_desc_len, blk_desc_offset, mode_data_length;
-
- dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
- llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
- mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
-
- blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
-
- resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
- /* Refer spc4r34 Table 440 for calculation of Mode data Length field */
- mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
-
- blk_desc_offset = mph_size;
- mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
- llbaa, mode_data_length, blk_desc_len);
- if (res)
- goto out_free;
- if (blk_desc_len > 0) {
- res = nvme_trans_fill_blk_desc(ns, hdr,
- &response[blk_desc_offset],
- blk_desc_len, llbaa);
- if (res)
- goto out_free;
- }
- res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
- mode_pages_tot_len);
- if (res)
- goto out_free;
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- out_free:
- kfree(response);
- out_mem:
- return res;
-}
-
-/* Read Capacity Helper Functions */
-
-static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
- u8 cdb16)
-{
- u8 flbas;
- u32 lba_length;
- u64 rlba;
- u8 prot_en;
- u8 p_type_lut[4] = {0, 0, 1, 2};
- __be64 tmp_rlba;
- __be32 tmp_rlba_32;
- __be32 tmp_len;
-
- flbas = (id_ns->flbas) & 0x0F;
- lba_length = (1 << (id_ns->lbaf[flbas].ds));
- rlba = le64_to_cpup(&id_ns->nsze) - 1;
- (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
-
- if (!cdb16) {
- if (rlba > 0xFFFFFFFF)
- rlba = 0xFFFFFFFF;
- tmp_rlba_32 = cpu_to_be32(rlba);
- tmp_len = cpu_to_be32(lba_length);
- memcpy(response, &tmp_rlba_32, sizeof(u32));
- memcpy(&response[4], &tmp_len, sizeof(u32));
- } else {
- tmp_rlba = cpu_to_be64(rlba);
- tmp_len = cpu_to_be32(lba_length);
- memcpy(response, &tmp_rlba, sizeof(u64));
- memcpy(&response[8], &tmp_len, sizeof(u32));
- response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
- /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
- /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
- /* Bytes 16-31 - Reserved */
- }
-}
-
-/* Start Stop Unit Helper Functions */
-
-static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 buffer_id)
-{
- struct nvme_command c;
- int nvme_sc;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_activate_fw;
- c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 opcode, u32 tot_len, u32 offset,
- u8 buffer_id)
-{
- int nvme_sc;
- struct nvme_command c;
-
- if (hdr->iovec_count > 0) {
- /* Assuming SGL is not allowed for this command */
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_download_fw;
- c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
- c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-
- nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
- hdr->dxferp, tot_len, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-/* Mode Select Helper Functions */
-
-static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
- u16 *bd_len, u8 *llbaa)
-{
- if (cdb10) {
- /* 10 Byte CDB */
- *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
- parm_list[MODE_SELECT_10_BD_OFFSET + 1];
- *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
- MODE_SELECT_10_LLBAA_MASK;
- } else {
- /* 6 Byte CDB */
- *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
- }
-}
-
-static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
- u16 idx, u16 bd_len, u8 llbaa)
-{
- /* Store block descriptor info if a FORMAT UNIT comes later */
- /* TODO Saving 1st BD info; what to do if multiple BD received? */
- if (llbaa == 0) {
- /* Standard Block Descriptor - spc4r34 7.5.5.1 */
- ns->mode_select_num_blocks =
- (parm_list[idx + 1] << 16) +
- (parm_list[idx + 2] << 8) +
- (parm_list[idx + 3]);
-
- ns->mode_select_block_len =
- (parm_list[idx + 5] << 16) +
- (parm_list[idx + 6] << 8) +
- (parm_list[idx + 7]);
- } else {
- /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
- ns->mode_select_num_blocks =
- (((u64)parm_list[idx + 0]) << 56) +
- (((u64)parm_list[idx + 1]) << 48) +
- (((u64)parm_list[idx + 2]) << 40) +
- (((u64)parm_list[idx + 3]) << 32) +
- (((u64)parm_list[idx + 4]) << 24) +
- (((u64)parm_list[idx + 5]) << 16) +
- (((u64)parm_list[idx + 6]) << 8) +
- ((u64)parm_list[idx + 7]);
-
- ns->mode_select_block_len =
- (parm_list[idx + 12] << 24) +
- (parm_list[idx + 13] << 16) +
- (parm_list[idx + 14] << 8) +
- (parm_list[idx + 15]);
- }
-}
-
-static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *mode_page, u8 page_code)
-{
- int res = 0;
- int nvme_sc;
- unsigned dword11;
-
- switch (page_code) {
- case MODE_PAGE_CACHING:
- dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
- nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
- dword11, NULL, 0, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- break;
- case MODE_PAGE_CONTROL:
- break;
- case MODE_PAGE_POWER_CONDITION:
- /* Verify the OS is not trying to set timers */
- if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- return res;
-}
-
-static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd, u16 parm_list_len, u8 pf,
- u8 sp, u8 cdb10)
-{
- int res;
- u8 *parm_list;
- u16 bd_len;
- u8 llbaa = 0;
- u16 index, saved_index;
- u8 page_code;
- u16 mp_size;
-
- /* Get parm list from data-in/out buffer */
- parm_list = kmalloc(parm_list_len, GFP_KERNEL);
- if (parm_list == NULL) {
- res = -ENOMEM;
- goto out;
- }
-
- res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
- if (res)
- goto out_mem;
-
- nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
- index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
-
- if (bd_len != 0) {
- /* Block Descriptors present, parse */
- nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
- index += bd_len;
- }
- saved_index = index;
-
- /* Multiple mode pages may be present; iterate through all */
- /* In 1st Iteration, don't do NVME Command, only check for CDB errors */
- do {
- page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
- mp_size = parm_list[index + 1] + 2;
- if ((page_code != MODE_PAGE_CACHING) &&
- (page_code != MODE_PAGE_CONTROL) &&
- (page_code != MODE_PAGE_POWER_CONDITION)) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
- index += mp_size;
- } while (index < parm_list_len);
-
- /* In 2nd Iteration, do the NVME Commands */
- index = saved_index;
- do {
- page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
- mp_size = parm_list[index + 1] + 2;
- res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
- page_code);
- if (res)
- break;
- index += mp_size;
- } while (index < parm_list_len);
-
- out_mem:
- kfree(parm_list);
- out:
- return res;
-}
-
-/* Format Unit Helper Functions */
-
-static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
- struct sg_io_hdr *hdr)
-{
- int res = 0;
- int nvme_sc;
- u8 flbas;
-
- /*
- * SCSI Expects a MODE SELECT would have been issued prior to
- * a FORMAT UNIT, and the block size and number would be used
- * from the block descriptor in it. If a MODE SELECT had not
- * been issued, FORMAT shall use the current values for both.
- */
-
- if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
- struct nvme_id_ns *id_ns;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- if (ns->mode_select_num_blocks == 0)
- ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
- if (ns->mode_select_block_len == 0) {
- flbas = (id_ns->flbas) & 0x0F;
- ns->mode_select_block_len =
- (1 << (id_ns->lbaf[flbas].ds));
- }
-
- kfree(id_ns);
- }
-
- return 0;
-}
-
-static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
- u8 format_prot_info, u8 *nvme_pf_code)
-{
- int res;
- u8 *parm_list;
- u8 pf_usage, pf_code;
-
- parm_list = kmalloc(len, GFP_KERNEL);
- if (parm_list == NULL) {
- res = -ENOMEM;
- goto out;
- }
- res = nvme_trans_copy_from_user(hdr, parm_list, len);
- if (res)
- goto out_mem;
-
- if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
- FORMAT_UNIT_IMMED_MASK) != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
-
- if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
- (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
- pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
- FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
- pf_code = (pf_usage << 2) | format_prot_info;
- switch (pf_code) {
- case 0:
- *nvme_pf_code = 0;
- break;
- case 2:
- *nvme_pf_code = 1;
- break;
- case 3:
- *nvme_pf_code = 2;
- break;
- case 7:
- *nvme_pf_code = 3;
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out_mem:
- kfree(parm_list);
- out:
- return res;
-}
-
-static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 prot_info)
-{
- int res;
- int nvme_sc;
- struct nvme_id_ns *id_ns;
- u8 i;
- u8 nlbaf;
- u8 selected_lbaf = 0xFF;
- u32 cdw10 = 0;
- struct nvme_command c;
-
- /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- nlbaf = id_ns->nlbaf;
-
- for (i = 0; i < nlbaf; i++) {
- if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
- selected_lbaf = i;
- break;
- }
- }
- if (selected_lbaf > 0x0F) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- cdw10 |= prot_info << 5;
- cdw10 |= selected_lbaf & 0x0F;
- memset(&c, 0, sizeof(c));
- c.format.opcode = nvme_admin_format_nvm;
- c.format.nsid = cpu_to_le32(ns->ns_id);
- c.format.cdw10 = cpu_to_le32(cdw10);
-
- nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
- res = nvme_trans_status_code(hdr, nvme_sc);
-
- kfree(id_ns);
- return res;
-}
-
-static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
- struct nvme_trans_io_cdb *cdb_info,
- u32 max_blocks)
-{
- /* If using iovecs, send one nvme command per vector */
- if (hdr->iovec_count > 0)
- return hdr->iovec_count;
- else if (cdb_info->xfer_len > max_blocks)
- return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
- else
- return 1;
-}
-
-static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
- struct nvme_trans_io_cdb *cdb_info)
-{
- u16 control = 0;
-
- /* When Protection information support is added, implement here */
-
- if (cdb_info->fua > 0)
- control |= NVME_RW_FUA;
-
- return control;
-}
-
-static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- struct nvme_trans_io_cdb *cdb_info, u8 is_write)
-{
- int nvme_sc = NVME_SC_SUCCESS;
- u32 num_cmds;
- u64 unit_len;
- u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
- u32 retcode;
- u32 i = 0;
- u64 nvme_offset = 0;
- void __user *next_mapping_addr;
- struct nvme_command c;
- u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
- u16 control;
- u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
-
- num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
-
- /*
- * This loop handles two cases.
- * First, when an SGL is used in the form of an iovec list:
- * - Use iov_base as the next mapping address for the nvme command_id
- * - Use iov_len as the data transfer length for the command.
- * Second, when we have a single buffer
- * - If larger than max_blocks, split into chunks, offset
- * each nvme command accordingly.
- */
- for (i = 0; i < num_cmds; i++) {
- memset(&c, 0, sizeof(c));
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- retcode = copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (retcode)
- return -EFAULT;
- unit_len = sgl.iov_len;
- unit_num_blocks = unit_len >> ns->lba_shift;
- next_mapping_addr = sgl.iov_base;
- } else {
- unit_num_blocks = min((u64)max_blocks,
- (cdb_info->xfer_len - nvme_offset));
- unit_len = unit_num_blocks << ns->lba_shift;
- next_mapping_addr = hdr->dxferp +
- ((1 << ns->lba_shift) * nvme_offset);
- }
-
- c.rw.opcode = opcode;
- c.rw.nsid = cpu_to_le32(ns->ns_id);
- c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
- c.rw.length = cpu_to_le16(unit_num_blocks - 1);
- control = nvme_trans_io_get_control(ns, cdb_info);
- c.rw.control = cpu_to_le16(control);
-
- if (get_capacity(ns->disk) - unit_num_blocks <
- cdb_info->lba + nvme_offset) {
- nvme_sc = NVME_SC_LBA_RANGE;
- break;
- }
- nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
- next_mapping_addr, unit_len, NULL, 0);
- if (nvme_sc)
- break;
-
- nvme_offset += unit_num_blocks;
- }
-
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-
-/* SCSI Command Translation Functions */
-
-static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
- u8 *cmd)
-{
- int res = 0;
- struct nvme_trans_io_cdb cdb_info = { 0, };
- u8 opcode = cmd[0];
- u64 xfer_bytes;
- u64 sum_iov_len = 0;
- struct sg_iovec sgl;
- int i;
- size_t not_copied;
-
- /*
- * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
- * but always in the same place for all others.
- */
- switch (opcode) {
- case WRITE_6:
- case READ_6:
- break;
- default:
- cdb_info.fua = cmd[1] & 0x8;
- cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
- if (cdb_info.prot_info && !ns->pi_type) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- }
-
- switch (opcode) {
- case WRITE_6:
- case READ_6:
- cdb_info.lba = get_unaligned_be24(&cmd[1]);
- cdb_info.xfer_len = cmd[4];
- if (cdb_info.xfer_len == 0)
- cdb_info.xfer_len = 256;
- break;
- case WRITE_10:
- case READ_10:
- cdb_info.lba = get_unaligned_be32(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
- break;
- case WRITE_12:
- case READ_12:
- cdb_info.lba = get_unaligned_be32(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
- break;
- case WRITE_16:
- case READ_16:
- cdb_info.lba = get_unaligned_be64(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
- break;
- default:
- /* Will never really reach here */
- res = -EIO;
- goto out;
- }
-
- /* Calculate total length of transfer (in bytes) */
- if (hdr->iovec_count > 0) {
- for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
- return -EFAULT;
- sum_iov_len += sgl.iov_len;
- /* IO vector sizes should be multiples of block size */
- if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- }
- } else {
- sum_iov_len = hdr->dxfer_len;
- }
-
- /* As Per sg ioctl howto, if the lengths differ, use the lower one */
- xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
-
- /* If block count and actual data buffer size dont match, error out */
- if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
- res = -EINVAL;
- goto out;
- }
-
- /* Check for 0 length transfer - it is not illegal */
- if (cdb_info.xfer_len == 0)
- goto out;
-
- /* Send NVMe IO Command(s) */
- res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
- if (res)
- goto out;
-
- out:
- return res;
-}
-
-static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u8 evpd;
- u8 page_code;
- int alloc_len;
- u8 *inq_response;
-
- evpd = cmd[1] & 0x01;
- page_code = cmd[2];
- alloc_len = get_unaligned_be16(&cmd[3]);
-
- inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
- GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- if (evpd == 0) {
- if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
- res = nvme_trans_standard_inquiry_page(ns, hdr,
- inq_response, alloc_len);
- } else {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- } else {
- switch (page_code) {
- case VPD_SUPPORTED_PAGES:
- res = nvme_trans_supported_vpd_pages(ns, hdr,
- inq_response, alloc_len);
- break;
- case VPD_SERIAL_NUMBER:
- res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_DEVICE_IDENTIFIERS:
- res = nvme_trans_device_id_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_EXTENDED_INQUIRY:
- res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
- break;
- case VPD_BLOCK_LIMITS:
- res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_BLOCK_DEV_CHARACTERISTICS:
- res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
- break;
- default:
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- }
- kfree(inq_response);
- out_mem:
- return res;
-}
-
-static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u16 alloc_len;
- u8 pc;
- u8 page_code;
-
- if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
- pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
- if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- alloc_len = get_unaligned_be16(&cmd[7]);
- switch (page_code) {
- case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
- res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
- break;
- case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
- res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
- break;
- case LOG_PAGE_TEMPERATURE_PAGE:
- res = nvme_trans_log_temperature(ns, hdr, alloc_len);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- u8 cdb10 = 0;
- u16 parm_list_len;
- u8 page_format;
- u8 save_pages;
-
- page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
- save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
- if (cmd[0] == MODE_SELECT) {
- parm_list_len = cmd[4];
- } else {
- parm_list_len = cmd[7];
- cdb10 = 1;
- }
-
- if (parm_list_len != 0) {
- /*
- * According to SPC-4 r24, a paramter list length field of 0
- * shall not be considered an error
- */
- return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
- page_format, save_pages, cdb10);
- }
-
- return 0;
-}
-
-static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u16 alloc_len;
- u8 cdb10 = 0;
-
- if (cmd[0] == MODE_SENSE) {
- alloc_len = cmd[4];
- } else {
- alloc_len = get_unaligned_be16(&cmd[7]);
- cdb10 = 1;
- }
-
- if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
- MODE_SENSE_PC_CURRENT_VALUES) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
- case MODE_PAGE_CACHING:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_caching_page,
- MODE_PAGE_CACHING_LEN);
- break;
- case MODE_PAGE_CONTROL:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_control_page,
- MODE_PAGE_CONTROL_LEN);
- break;
- case MODE_PAGE_POWER_CONDITION:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_pow_cnd_page,
- MODE_PAGE_POW_CND_LEN);
- break;
- case MODE_PAGE_INFO_EXCEP:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_inf_exc_page,
- MODE_PAGE_INF_EXC_LEN);
- break;
- case MODE_PAGE_RETURN_ALL:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_all_pages,
- MODE_PAGE_ALL_LEN);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd, u8 cdb16)
-{
- int res;
- int nvme_sc;
- u32 alloc_len;
- u32 resp_size;
- u32 xfer_len;
- struct nvme_id_ns *id_ns;
- u8 *response;
-
- if (cdb16) {
- alloc_len = get_unaligned_be32(&cmd[10]);
- resp_size = READ_CAP_16_RESP_SIZE;
- } else {
- alloc_len = READ_CAP_10_RESP_SIZE;
- resp_size = READ_CAP_10_RESP_SIZE;
- }
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_free_id;
- }
- nvme_trans_fill_read_cap(response, id_ns, cdb16);
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out_free_id:
- kfree(id_ns);
- return res;
-}
-
-static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- int nvme_sc;
- u32 alloc_len, xfer_len, resp_size;
- u8 *response;
- struct nvme_id_ctrl *id_ctrl;
- u32 ll_length, lun_id;
- u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
- __be32 tmp_len;
-
- switch (cmd[2]) {
- default:
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- case ALL_LUNS_RETURNED:
- case ALL_WELL_KNOWN_LUNS_RETURNED:
- case RESTRICTED_LUNS_RETURNED:
- nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
- resp_size = ll_length + LUN_DATA_HEADER_SIZE;
-
- alloc_len = get_unaligned_be32(&cmd[6]);
- if (alloc_len < resp_size) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_free_id;
- }
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_free_id;
- }
-
- /* The first LUN ID will always be 0 per the SAM spec */
- for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
- /*
- * Set the LUN Id and then increment to the next LUN
- * location in the parameter data.
- */
- __be64 tmp_id = cpu_to_be64(lun_id);
- memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
- lun_id_offset += LUN_ENTRY_SIZE;
- }
- tmp_len = cpu_to_be32(ll_length);
- memcpy(response, &tmp_len, sizeof(u32));
- }
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out_free_id:
- kfree(id_ctrl);
- return res;
-}
-
-static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u8 alloc_len, xfer_len, resp_size;
- u8 desc_format;
- u8 *response;
-
- desc_format = cmd[1] & 0x01;
- alloc_len = cmd[4];
-
- resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
- (FIXED_FMT_SENSE_DATA_SIZE));
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out;
- }
-
- if (desc_format) {
- /* Descriptor Format Sense Data */
- response[0] = DESC_FORMAT_SENSE_DATA;
- response[1] = NO_SENSE;
- /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
- response[2] = SCSI_ASC_NO_SENSE;
- response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- /* SDAT_OVFL = 0 | Additional Sense Length = 0 */
- } else {
- /* Fixed Format Sense Data */
- response[0] = FIXED_SENSE_DATA;
- /* Byte 1 = Obsolete */
- response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
- /* Bytes 3-6 - Information - set to zero */
- response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
- /* Bytes 8-11 - Cmd Specific Information - set to zero */
- response[12] = SCSI_ASC_NO_SENSE;
- response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- /* Byte 14 = Field Replaceable Unit Code = 0 */
- /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
- }
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out:
- return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
- struct sg_io_hdr *hdr)
-{
- int nvme_sc;
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u8 parm_hdr_len = 0;
- u8 nvme_pf_code = 0;
- u8 format_prot_info, long_list, format_data;
-
- format_prot_info = (cmd[1] & 0xc0) >> 6;
- long_list = cmd[1] & 0x20;
- format_data = cmd[1] & 0x10;
-
- if (format_data != 0) {
- if (format_prot_info != 0) {
- if (long_list == 0)
- parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
- else
- parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
- }
- } else if (format_data == 0 && format_prot_info != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- /* Get parm header from data-in/out buffer */
- /*
- * According to the translation spec, the only fields in the parameter
- * list we are concerned with are in the header. So allocate only that.
- */
- if (parm_hdr_len > 0) {
- res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
- format_prot_info, &nvme_pf_code);
- if (res)
- goto out;
- }
-
- /* Attempt to activate any previously downloaded firmware image */
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
-
- /* Determine Block size and count and send format command */
- res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
- if (res)
- goto out;
-
- res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
-
- out:
- return res;
-}
-
-static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
- struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- if (nvme_ctrl_ready(ns->ctrl))
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- NOT_READY, SCSI_ASC_LUN_NOT_READY,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- else
- return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-}
-
-static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u32 buffer_offset, parm_list_length;
- u8 buffer_id, mode;
-
- parm_list_length = get_unaligned_be24(&cmd[6]);
- if (parm_list_length % BYTES_TO_DWORDS != 0) {
- /* NVMe expects Firmware file to be a whole number of DWORDS */
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- buffer_id = cmd[2];
- if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- mode = cmd[1] & 0x1f;
- buffer_offset = get_unaligned_be24(&cmd[3]);
-
- switch (mode) {
- case DOWNLOAD_SAVE_ACTIVATE:
- res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
- parm_list_length, buffer_offset,
- buffer_id);
- if (res)
- goto out;
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
- break;
- case DOWNLOAD_SAVE_DEFER_ACTIVATE:
- res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
- parm_list_length, buffer_offset,
- buffer_id);
- break;
- case ACTIVATE_DEFERRED_MICROCODE:
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-struct scsi_unmap_blk_desc {
- __be64 slba;
- __be32 nlb;
- u32 resv;
-};
-
-struct scsi_unmap_parm_list {
- __be16 unmap_data_len;
- __be16 unmap_blk_desc_data_len;
- u32 resv;
- struct scsi_unmap_blk_desc desc[0];
-};
-
-static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- struct scsi_unmap_parm_list *plist;
- struct nvme_dsm_range *range;
- struct nvme_command c;
- int i, nvme_sc, res;
- u16 ndesc, list_len;
-
- list_len = get_unaligned_be16(&cmd[7]);
- if (!list_len)
- return -EINVAL;
-
- plist = kmalloc(list_len, GFP_KERNEL);
- if (!plist)
- return -ENOMEM;
-
- res = nvme_trans_copy_from_user(hdr, plist, list_len);
- if (res)
- goto out;
-
- ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
- if (!ndesc || ndesc > 256) {
- res = -EINVAL;
- goto out;
- }
-
- range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
- if (!range) {
- res = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < ndesc; i++) {
- range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
- range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
- range[i].cattr = 0;
- }
-
- memset(&c, 0, sizeof(c));
- c.dsm.opcode = nvme_cmd_dsm;
- c.dsm.nsid = cpu_to_le32(ns->ns_id);
- c.dsm.nr = cpu_to_le32(ndesc - 1);
- c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
- ndesc * sizeof(*range));
- res = nvme_trans_status_code(hdr, nvme_sc);
-
- kfree(range);
- out:
- kfree(plist);
- return res;
-}
-
-static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
-{
- u8 cmd[16];
- int retcode;
- unsigned int opcode;
-
- if (hdr->cmdp == NULL)
- return -EMSGSIZE;
- if (hdr->cmd_len > sizeof(cmd))
- return -EINVAL;
- if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
- return -EFAULT;
-
- /*
- * Prime the hdr with good status for scsi commands that don't require
- * an nvme command for translation.
- */
- retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
- if (retcode)
- return retcode;
-
- opcode = cmd[0];
-
- switch (opcode) {
- case READ_6:
- case READ_10:
- case READ_12:
- case READ_16:
- retcode = nvme_trans_io(ns, hdr, 0, cmd);
- break;
- case WRITE_6:
- case WRITE_10:
- case WRITE_12:
- case WRITE_16:
- retcode = nvme_trans_io(ns, hdr, 1, cmd);
- break;
- case INQUIRY:
- retcode = nvme_trans_inquiry(ns, hdr, cmd);
- break;
- case LOG_SENSE:
- retcode = nvme_trans_log_sense(ns, hdr, cmd);
- break;
- case MODE_SELECT:
- case MODE_SELECT_10:
- retcode = nvme_trans_mode_select(ns, hdr, cmd);
- break;
- case MODE_SENSE:
- case MODE_SENSE_10:
- retcode = nvme_trans_mode_sense(ns, hdr, cmd);
- break;
- case READ_CAPACITY:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
- break;
- case SERVICE_ACTION_IN_16:
- switch (cmd[1]) {
- case SAI_READ_CAPACITY_16:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
- break;
- default:
- goto out;
- }
- break;
- case REPORT_LUNS:
- retcode = nvme_trans_report_luns(ns, hdr, cmd);
- break;
- case REQUEST_SENSE:
- retcode = nvme_trans_request_sense(ns, hdr, cmd);
- break;
- case SYNCHRONIZE_CACHE:
- retcode = nvme_trans_synchronize_cache(ns, hdr);
- break;
- case FORMAT_UNIT:
- retcode = nvme_trans_format_unit(ns, hdr, cmd);
- break;
- case TEST_UNIT_READY:
- retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
- break;
- case WRITE_BUFFER:
- retcode = nvme_trans_write_buffer(ns, hdr, cmd);
- break;
- case UNMAP:
- retcode = nvme_trans_unmap(ns, hdr, cmd);
- break;
- default:
- out:
- retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- return retcode;
-}
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
-{
- struct sg_io_hdr hdr;
- int retcode;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
- return -EFAULT;
- if (hdr.interface_id != 'S')
- return -EINVAL;
-
- /*
- * A positive return code means a NVMe status, which has been
- * translated to sense data.
- */
- retcode = nvme_scsi_translate(ns, &hdr);
- if (retcode < 0)
- return retcode;
- if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
- return -EFAULT;
- return 0;
-}
-
-int nvme_sg_get_version_num(int __user *ip)
-{
- return put_user(sg_version_num, ip);
-}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f97006322..35f930db3c02 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -336,7 +336,7 @@ out:
static void nvmet_execute_identify_nslist(struct nvmet_req *req)
{
- static const int buf_size = 4096;
+ static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ out:
nvmet_req_complete(req, status);
}
+static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
+ void *id, off_t *off)
+{
+ struct nvme_ns_id_desc desc = {
+ .nidt = type,
+ .nidl = len,
+ };
+ u16 status;
+
+ status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
+ if (status)
+ return status;
+ *off += sizeof(desc);
+
+ status = nvmet_copy_to_sgl(req, *off, id, len);
+ if (status)
+ return status;
+ *off += len;
+
+ return 0;
+}
+
+static void nvmet_execute_identify_desclist(struct nvmet_req *req)
+{
+ struct nvmet_ns *ns;
+ u16 status = 0;
+ off_t off = 0;
+
+ ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+ if (!ns) {
+ status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ goto out;
+ }
+
+ if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
+ status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
+ NVME_NIDT_UUID_LEN,
+ &ns->uuid, &off);
+ if (status)
+ goto out_put_ns;
+ }
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
+ status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
+ NVME_NIDT_NGUID_LEN,
+ &ns->nguid, &off);
+ if (status)
+ goto out_put_ns;
+ }
+
+ if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
+ off) != NVME_IDENTIFY_DATA_SIZE - off)
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+out_put_ns:
+ nvmet_put_namespace(ns);
+out:
+ nvmet_req_complete(req, status);
+}
+
/*
* A "mimimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
}
break;
case nvme_admin_identify:
- req->data_len = 4096;
+ req->data_len = NVME_IDENTIFY_DATA_SIZE;
switch (cmd->identify.cns) {
case NVME_ID_CNS_NS:
req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_ID_CNS_NS_ACTIVE_LIST:
req->execute = nvmet_execute_identify_nslist;
return 0;
+ case NVME_ID_CNS_NS_DESC_LIST:
+ req->execute = nvmet_execute_identify_desclist;
+ return 0;
}
break;
case nvme_admin_abort_cmd:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800078e2..a358ecd93e11 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -305,11 +305,41 @@ out_unlock:
CONFIGFS_ATTR(nvmet_ns_, device_path);
+static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
+}
+
+static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ struct nvmet_subsys *subsys = ns->subsys;
+ int ret = 0;
+
+
+ mutex_lock(&subsys->lock);
+ if (ns->enabled) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+
+ if (uuid_parse(page, &ns->uuid))
+ ret = -EINVAL;
+
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return ret ? ret : count;
+}
+
static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
{
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
}
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
const char *page, size_t count)
{
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
+ &nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_enable,
NULL,
};
@@ -619,8 +650,45 @@ out_unlock:
CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+
+ if (NVME_TERTIARY(subsys->ver))
+ return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver),
+ (int)NVME_TERTIARY(subsys->ver));
+ else
+ return snprintf(page, PAGE_SIZE, "%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ int major, minor, tertiary = 0;
+ int ret;
+
+
+ ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+ if (ret != 2 && ret != 3)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ subsys->ver = NVME_VS(major, minor, tertiary);
+ up_write(&nvmet_config_sem);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
+ &nvmet_subsys_attr_version,
NULL,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index eb9399ac97cf..b5b4ac103748 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
+ uuid_gen(&ns->uuid);
return ns;
}
@@ -926,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
if (!subsys)
return NULL;
- subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+ subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
switch (type) {
case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597e81fc..8f3b57b4c97b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
e->portid = port->disc_addr.portid;
/* we support only dynamic controllers */
e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
- e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+ e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
e->subtype = type;
memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
case nvme_admin_identify:
- req->data_len = 4096;
+ req->data_len = NVME_IDENTIFY_DATA_SIZE;
switch (cmd->identify.cns) {
case NVME_ID_CNS_CTRL:
req->execute =
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 2006fae61980..7692a96c9065 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2096,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
/* clear any response payload */
memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
+ fod->data_sg = NULL;
+ fod->data_sg_cnt = 0;
+
ret = nvmet_req_init(&fod->req,
&fod->queue->nvme_cq,
&fod->queue->nvme_sq,
&nvmet_fc_tgt_fcp_ops);
- if (!ret) { /* bad SQE content or invalid ctrl state */
- nvmet_fc_abort_op(tgtport, fod);
+ if (!ret) {
+ /* bad SQE content or invalid ctrl state */
+ /* nvmet layer has already called op done to send rsp. */
return;
}
/* keep a running counter of tail position */
atomic_inc(&fod->queue->sqtail);
- fod->data_sg = NULL;
- fod->data_sg_cnt = 0;
if (fod->total_length) {
ret = nvmet_fc_alloc_tgt_pgs(fod);
if (ret) {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 294a6611fb24..1bb9d5b311b1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *tgt_fcpreq)
{
struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
- int active;
/*
* mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
* after the abort request
*/
spin_lock(&tfcp_req->reqlock);
- active = tfcp_req->active;
tfcp_req->aborted = true;
spin_unlock(&tfcp_req->reqlock);
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d80fc8..40128793e613 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
struct nvmet_req *req = bio->bi_private;
nvmet_req_complete(req,
- bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
if (bio != &req->inline_bio)
bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
if (status) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else {
submit_bio(bio);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e503cfff0337..5f55c683b338 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -21,8 +21,6 @@
#include "../host/nvme.h"
#include "../host/fabrics.h"
-#define NVME_LOOP_AQ_DEPTH 256
-
#define NVME_LOOP_MAX_SEGMENTS 256
/*
@@ -31,7 +29,7 @@
*/
#define NVME_LOOP_NR_AEN_COMMANDS 1
#define NVME_LOOP_AQ_BLKMQ_DEPTH \
- (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
struct nvme_loop_iod {
struct nvme_request nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
};
struct nvme_loop_ctrl {
- spinlock_t lock;
struct nvme_loop_queue *queues;
u32 queue_count;
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
struct nvmet_ctrl *target_ctrl;
struct work_struct delete_work;
- struct work_struct reset_work;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
/* queue error recovery */
- schedule_work(&iod->queue->ctrl->reset_work);
+ nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
/* fail with DNR on admin cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
return BLK_EH_HANDLED;
}
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_loop_queue *queue = hctx->driver_data;
struct request *req = bd->rq;
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- int ret;
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &iod->cmd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_cleanup_cmd(req);
blk_mq_start_request(req);
nvme_loop_queue_response(&iod->req);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
if (blk_rq_bytes(req)) {
iod->sg_table.sgl = iod->first_sgl;
- ret = sg_alloc_table_chained(&iod->sg_table,
+ if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl);
- if (ret)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ iod->sg_table.sgl))
+ return BLK_STS_RESOURCE;
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req);
schedule_work(&iod->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
unsigned int numa_node)
{
- return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req),
- hctx_idx + 1);
-}
+ struct nvme_loop_ctrl *ctrl = set->driver_data;
-static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set,
- struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
+ return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
+ (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
}
static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
.queue_rq = nvme_loop_queue_rq,
.complete = nvme_loop_complete_rq,
- .init_request = nvme_loop_init_admin_request,
+ .init_request = nvme_loop_init_request,
.init_hctx = nvme_loop_init_admin_hctx,
.timeout = nvme_loop_timeout,
};
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!schedule_work(&ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
static void nvme_loop_reset_ctrl_work(struct work_struct *work)
{
- struct nvme_loop_ctrl *ctrl = container_of(work,
- struct nvme_loop_ctrl, reset_work);
+ struct nvme_loop_ctrl *ctrl =
+ container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
bool changed;
int ret;
@@ -540,21 +530,6 @@ out_disable:
nvme_put_ctrl(&ctrl->ctrl);
}
-static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!schedule_work(&ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
-}
-
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.name = "loop",
.module = THIS_MODULE,
@@ -562,11 +537,9 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_loop_reset_ctrl,
.free_ctrl = nvme_loop_free_ctrl,
.submit_async_event = nvme_loop_submit_async_event,
.delete_ctrl = nvme_loop_del_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
};
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
INIT_LIST_HEAD(&ctrl->list);
INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
0 /* no quirks, we're perfect! */);
if (ret)
goto out_put_ctrl;
- spin_lock_init(&ctrl->lock);
-
ret = -ENOMEM;
ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
__nvme_loop_del_ctrl(ctrl);
mutex_unlock(&nvme_loop_ctrl_mutex);
- flush_scheduled_work();
+ flush_workqueue(nvme_wq);
}
module_init(nvme_loop_init_module);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8ff6e430b30a..747bbdb4f9c6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -47,6 +47,7 @@ struct nvmet_ns {
u32 blksize_shift;
loff_t size;
u8 nguid[16];
+ uuid_t uuid;
bool enabled;
struct nvmet_subsys *subsys;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9e45cde63376..56a4cba690b5 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
queue->send_queue_size = le16_to_cpu(req->hrqsize);
- if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+ if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
return NVME_RDMA_CM_INVALID_HSQSIZE;
/* XXX: Should we enforce some kind of max for IO queues? */
@@ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
/**
* nvme_rdma_device_removal() - Handle RDMA device removal
+ * @cm_id: rdma_cm id, used for nvmet port
* @queue: nvmet rdma queue (cm id qp_context)
- * @addr: nvmet address (cm_id context)
*
* DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * to unplug. Note that this event can be generated on a normal
+ * queue cm_id and/or a device bound listener cm_id (where in this
+ * case queue will be null).
*
- * Note that this event can be generated on a normal queue cm_id
- * and/or a device bound listener cm_id (where in this case
- * queue will be null).
- *
- * we claim ownership on destroying the cm_id. For queues we move
- * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * We registered an ib_client to handle device removal for queues,
+ * so we only need to handle the listening port cm_ids. In this case
* we nullify the priv to prevent double cm_id destruction and destroying
* the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
- unsigned long flags;
-
- if (!queue) {
- struct nvmet_port *port = cm_id->context;
+ struct nvmet_port *port;
+ if (queue) {
/*
- * This is a listener cm_id. Make sure that
- * future remove_port won't invoke a double
- * cm_id destroy. use atomic xchg to make sure
- * we don't compete with remove_port.
- */
- if (xchg(&port->priv, NULL) != cm_id)
- return 0;
- } else {
- /*
- * This is a queue cm_id. Make sure that
- * release queue will not destroy the cm_id
- * and schedule all ctrl queues removal (only
- * if the queue is not disconnecting already).
+ * This is a queue cm_id. we have registered
+ * an ib_client to handle queues removal
+ * so don't interfear and just return.
*/
- spin_lock_irqsave(&queue->state_lock, flags);
- if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
- queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
- spin_unlock_irqrestore(&queue->state_lock, flags);
- nvmet_rdma_queue_disconnect(queue);
- flush_scheduled_work();
+ return 0;
}
+ port = cm_id->context;
+
+ /*
+ * This is a listener cm_id. Make sure that
+ * future remove_port won't invoke a double
+ * cm_id destroy. use atomic xchg to make sure
+ * we don't compete with remove_port.
+ */
+ if (xchg(&port->priv, NULL) != cm_id)
+ return 0;
+
/*
* We need to return 1 so that the core will destroy
* it's own ID. What a great API design..
@@ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
.delete_ctrl = nvmet_rdma_delete_ctrl,
};
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+ struct nvmet_rdma_queue *queue;
+
+ /* Device is being removed, delete all queues using this device */
+ mutex_lock(&nvmet_rdma_queue_mutex);
+ list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+ if (queue->dev->device != ib_device)
+ continue;
+
+ pr_info("Removing queue %d\n", queue->idx);
+ __nvmet_rdma_queue_disconnect(queue);
+ }
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+
+ flush_scheduled_work();
+}
+
+static struct ib_client nvmet_rdma_ib_client = {
+ .name = "nvmet_rdma",
+ .add = nvmet_rdma_add_one,
+ .remove = nvmet_rdma_remove_one
+};
+
static int __init nvmet_rdma_init(void)
{
- return nvmet_register_transport(&nvmet_rdma_ops);
+ int ret;
+
+ ret = ib_register_client(&nvmet_rdma_ib_client);
+ if (ret)
+ return ret;
+
+ ret = nvmet_register_transport(&nvmet_rdma_ops);
+ if (ret)
+ goto err_ib_client;
+
+ return 0;
+
+err_ib_client:
+ ib_unregister_client(&nvmet_rdma_ib_client);
+ return ret;
}
static void __exit nvmet_rdma_exit(void)
@@ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
mutex_unlock(&nvmet_rdma_queue_mutex);
flush_scheduled_work();
+ ib_unregister_client(&nvmet_rdma_ib_client);
ida_destroy(&nvmet_rdma_queue_ida);
}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b7cbd5d2cdea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
*/
if (basedev->state < DASD_STATE_READY) {
while ((req = blk_fetch_request(block->request_queue)))
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
"Rejecting write request %p",
req);
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
"Rejecting failfast request %p",
req);
blk_start_request(req);
- __blk_end_request_all(req, -ETIMEDOUT);
+ __blk_end_request_all(req, BLK_STS_TIMEOUT);
continue;
}
cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
"on request %p",
PTR_ERR(cqr), req);
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
/*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
int status;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
+
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
- error = status;
+ error = errno_to_blk_status(status);
else if (status == 0) {
- if (cqr->intrc == -EPERM)
- error = -EBADE;
- else if (cqr->intrc == -ENOLINK ||
- cqr->intrc == -ETIMEDOUT)
- error = cqr->intrc;
- else
- error = -EIO;
+ switch (cqr->intrc) {
+ case -EPERM:
+ error = BLK_STS_NEXUS;
+ break;
+ case -ENOLINK:
+ error = BLK_STS_TRANSPORT;
+ break;
+ case -ETIMEDOUT:
+ error = BLK_STS_TIMEOUT;
+ break;
+ default:
+ error = BLK_STS_IOERR;
+ break;
+ }
}
__blk_end_request_all(req, error);
}
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
spin_lock_irq(&block->request_queue_lock);
while ((req = blk_fetch_request(block->request_queue)))
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
spin_unlock_irq(&block->request_queue_lock);
}
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..06eb1de52d1c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
unsigned long source_addr;
unsigned long bytes_done;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
bytes_done = 0;
dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de6817875..3c2c84b72877 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
aob->request.data = (u64) aobrq;
scmrq->bdev = bdev;
scmrq->retries = 4;
- scmrq->error = 0;
+ scmrq->error = BLK_STS_OK;
/* We don't use all msbs - place aidaws at the end of the aob page. */
scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
{
struct aob *aob = scmrq->aob;
- if (scmrq->error == -ETIMEDOUT)
+ if (scmrq->error == BLK_STS_TIMEOUT)
SCM_LOG(1, "Request timeout");
else {
SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
scmrq->error);
}
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
{
struct scm_request *scmrq = data;
struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
struct scm_blk_dev *bdev = scmrq->bdev;
unsigned long flags;
- if (scmrq->error != -EIO)
+ if (scmrq->error != BLK_STS_IOERR)
goto restart;
/* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cdc5129..cd598d1a4eae 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
struct aob *aob;
struct list_head list;
u8 retries;
- int error;
+ blk_status_t error;
#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
struct {
enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
void scm_blk_dev_cleanup(struct scm_blk_dev *);
void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
void scm_request_finish(struct scm_request *);
void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
unsigned long page_addr;
unsigned long bytes;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if ((bio->bi_iter.bi_sector & 7) != 0 ||
(bio->bi_iter.bi_size & 4095) != 0)
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc7f644..0f11f3bcac82 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
struct eadm_private *private = get_eadm_private(sch);
struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
struct irb *irb = this_cpu_ptr(&cio_irb);
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
EADM_LOG(6, "irq");
EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
&& scsw->eswf == 1 && irb->esw.eadm.erw.r)
- error = -EIO;
+ error = BLK_STS_IOERR;
if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
- error = -ETIMEDOUT;
+ error = BLK_STS_TIMEOUT;
eadm_subchannel_set_timeout(sch, 0);
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268edc54ae..1fa53ecdc2aa 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
}
EXPORT_SYMBOL_GPL(scm_driver_unregister);
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
{
struct aob_rq_header *aobrq = (void *) aob->request.data;
struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9dc893e..14f377ac1280 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
struct jsfd_part *jdp = req->rq_disk->private_data;
unsigned long offset = blk_rq_pos(req) << 9;
size_t len = blk_rq_cur_bytes(req);
- int err = -EIO;
+ blk_status_t err = BLK_STS_IOERR;
if ((offset + len) > jdp->dsize)
goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
}
jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
- err = 0;
+ err = BLK_STS_OK;
end:
if (!__blk_end_request_cur(req, err))
req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
put_disk(disk);
goto out;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
jsfd_disk[i] = disk;
}
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 8a1b94816419..a4f28b7e4c65 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
* code paths.
*/
if (unlikely(rq->bio))
- blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+ blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
else
blk_put_request(rq);
}
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
EXPORT_SYMBOL(osd_end_request);
static void _set_error_resid(struct osd_request *or, struct request *req,
- int error)
+ blk_status_t error)
{
or->async_error = error;
- or->req_errors = scsi_req(req)->result ? : error;
+ or->req_errors = scsi_req(req)->result;
or->sense_len = scsi_req(req)->sense_len;
if (or->sense_len)
memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
int osd_execute_request(struct osd_request *or)
{
- int error;
-
blk_execute_rq(or->request->q, NULL, or->request, 0);
- error = scsi_req(or->request)->result ? -EIO : 0;
- _set_error_resid(or, or->request, error);
- return error;
+ if (scsi_req(or->request)->result) {
+ _set_error_resid(or, or->request, BLK_STS_IOERR);
+ return -EIO;
+ }
+
+ _set_error_resid(or, or->request, BLK_STS_OK);
+ return 0;
}
EXPORT_SYMBOL(osd_execute_request);
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
{
struct osd_request *or = req->end_io_data;
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
flags);
if (IS_ERR(req))
return req;
- scsi_req_init(req);
for_each_bio(bio) {
- struct bio *bounce_bio = bio;
-
- blk_queue_bounce(req->q, &bounce_bio);
- ret = blk_rq_append_bio(req, bounce_bio);
+ ret = blk_rq_append_bio(req, bio);
if (ret)
return ERR_PTR(ret);
}
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
ret = PTR_ERR(req);
goto out;
}
- scsi_req_init(req);
or->in.req = or->request->next_rq = req;
}
} else if (has_in)
@@ -1914,7 +1911,7 @@ analyze:
/* scsi sense is Empty, the request was never issued to target
* linux return code might tell us what happened.
*/
- if (or->async_error == -ENOMEM)
+ if (or->async_error == BLK_STS_RESOURCE)
osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
else
osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed92f07d..929ee7e88120 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
/* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
{
struct scsi_request *rq = scsi_req(req);
struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
return DRIVER_ERROR << 24;
rq = scsi_req(req);
- scsi_req_init(req);
req->rq_flags |= RQF_QUIET;
SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..304a7158540f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
}
}
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
{
__blk_put_request(req->q, req);
}
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
if (IS_ERR(req))
return;
rq = scsi_req(req);
- scsi_req_init(req);
rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
rq->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 99e16ac479e3..550e29f903b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
if (IS_ERR(req))
return ret;
rq = scsi_req(req);
- scsi_req_init(req);
if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
buffer, bufflen, __GFP_RECLAIM))
@@ -635,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
cmd->request->next_rq->special = NULL;
}
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
unsigned int bytes, unsigned int bidi_bytes)
{
struct scsi_cmnd *cmd = req->special;
@@ -694,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
* @cmd: SCSI command (unused)
* @result: scsi error code
*
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK temporary transport failure
- * -EREMOTEIO permanent target failure, do not retry
- * -EBADE permanent nexus failure, retry on other path
- * -ENOSPC No write space available
- * -ENODATA Medium error
- * -EIO unspecified I/O error
+ * Translate SCSI error code into block errors.
*/
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+ int result)
{
- int error = 0;
-
- switch(host_byte(result)) {
+ switch (host_byte(result)) {
case DID_TRANSPORT_FAILFAST:
- error = -ENOLINK;
- break;
+ return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:
set_host_byte(cmd, DID_OK);
- error = -EREMOTEIO;
- break;
+ return BLK_STS_TARGET;
case DID_NEXUS_FAILURE:
- set_host_byte(cmd, DID_OK);
- error = -EBADE;
- break;
+ return BLK_STS_NEXUS;
case DID_ALLOC_FAILURE:
set_host_byte(cmd, DID_OK);
- error = -ENOSPC;
- break;
+ return BLK_STS_NOSPC;
case DID_MEDIUM_ERROR:
set_host_byte(cmd, DID_OK);
- error = -ENODATA;
- break;
+ return BLK_STS_MEDIUM;
default:
- error = -EIO;
- break;
+ return BLK_STS_IOERR;
}
-
- return error;
}
/*
@@ -769,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
int result = cmd->result;
struct request_queue *q = cmd->device->request_queue;
struct request *req = cmd->request;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
struct scsi_sense_hdr sshdr;
bool sense_valid = false;
int sense_deferred = 0, level = 0;
@@ -808,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
* both sides at once.
*/
scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
- if (scsi_end_request(req, 0, blk_rq_bytes(req),
+ if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
blk_rq_bytes(req->next_rq)))
BUG();
return;
@@ -850,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
scsi_print_sense(cmd);
result = 0;
/* for passthrough error may be set */
- error = 0;
+ error = BLK_STS_OK;
}
/*
@@ -922,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
action = ACTION_REPREP;
} else if (sshdr.asc == 0x10) /* DIX */ {
action = ACTION_FAIL;
- error = -EILSEQ;
+ error = BLK_STS_PROTECTION;
/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
action = ACTION_FAIL;
- error = -EREMOTEIO;
+ error = BLK_STS_TARGET;
} else
action = ACTION_FAIL;
break;
case ABORTED_COMMAND:
action = ACTION_FAIL;
if (sshdr.asc == 0x10) /* DIF */
- error = -EILSEQ;
+ error = BLK_STS_PROTECTION;
break;
case NOT_READY:
/* If the device is in the process of becoming
@@ -1134,6 +1116,20 @@ err_exit:
}
EXPORT_SYMBOL(scsi_init_io);
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+ scsi_req_init(&cmd->req);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
{
void *buf = cmd->sense_buffer;
@@ -1829,15 +1825,15 @@ out_delay:
blk_delay_queue(q, SCSI_QUEUE_DELAY);
}
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
{
switch (ret) {
case BLKPREP_OK:
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
case BLKPREP_DEFER:
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
default:
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
}
@@ -1909,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
blk_mq_complete_request(cmd->request);
}
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *req = bd->rq;
@@ -1917,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost = sdev->host;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
- int ret;
+ blk_status_t ret;
int reason;
ret = prep_to_mq(scsi_prep_state_check(sdev, req));
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret != BLK_STS_OK)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!get_device(&sdev->sdev_gendev))
goto out;
@@ -1937,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!(req->rq_flags & RQF_DONTPREP)) {
ret = prep_to_mq(scsi_mq_prep_fn(req));
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret != BLK_STS_OK)
goto out_dec_host_busy;
req->rq_flags |= RQF_DONTPREP;
} else {
@@ -1955,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
reason = scsi_dispatch_cmd(cmd);
if (reason) {
scsi_set_blocked(cmd, reason);
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
goto out_dec_host_busy;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_dec_host_busy:
atomic_dec(&shost->host_busy);
@@ -1972,12 +1968,14 @@ out_put_device:
put_device(&sdev->sdev_gendev);
out:
switch (ret) {
- case BLK_MQ_RQ_QUEUE_BUSY:
+ case BLK_STS_OK:
+ break;
+ case BLK_STS_RESOURCE:
if (atomic_read(&sdev->device_busy) == 0 &&
!scsi_device_blocked(sdev))
blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
break;
- case BLK_MQ_RQ_QUEUE_ERROR:
+ default:
/*
* Make sure to release all allocated ressources when
* we hit an error, as we will never see this command
@@ -1986,8 +1984,6 @@ out:
if (req->rq_flags & RQF_DONTPREP)
scsi_mq_uninit_cmd(cmd);
break;
- default:
- break;
}
return ret;
}
@@ -2057,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
{
struct device *dev = shost->dma_dev;
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
/*
* this limit is imposed by hardware restrictions
*/
@@ -2139,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
q->request_fn = scsi_request_fn;
q->init_rq_fn = scsi_init_rq;
q->exit_rq_fn = scsi_exit_rq;
+ q->initialize_rq_fn = scsi_initialize_rq;
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
@@ -2163,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
#endif
.init_request = scsi_init_request,
.exit_request = scsi_exit_request,
+ .initialize_rq_fn = scsi_initialize_rq,
.map_queues = scsi_map_queues,
};
@@ -2977,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
if (wait)
blk_mq_quiesce_queue(q);
else
- blk_mq_stop_hw_queues(q);
+ blk_mq_quiesce_queue_nowait(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
@@ -3031,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
return -EINVAL;
if (q->mq_ops) {
- blk_mq_start_stopped_hw_queues(q, false);
+ blk_mq_unquiesce_queue(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_start_queue(q);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1bb908..5006a656e16a 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
#include <linux/bsg.h>
#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_request.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
struct sas_rphy *rphy)
{
struct request *req;
- int ret;
+ blk_status_t ret;
int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
+ q->initialize_rq_fn = scsi_initialize_rq;
q->cmd_size = sizeof(struct scsi_request);
if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
if (error)
goto out_cleanup_queue;
+ /*
+ * by default assume old behaviour and bounce for any highmem page
+ */
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
error = bsg_register_queue(q, dev, name, release);
if (error)
goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
q->queuedata = shost;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
return 0;
out_cleanup_queue:
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82c33a6edbea..21225d62b0c1 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
} Sg_device;
/* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
static int sg_start_req(Sg_request *srp, unsigned char *cmd);
static int sg_finish_rem_req(Sg_request * srp);
static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
if (atomic_read(&sdp->detaching)) {
if (srp->bio) {
scsi_req_free_cmd(scsi_req(srp->rq));
- blk_end_request_all(srp->rq, -EIO);
+ blk_end_request_all(srp->rq, BLK_STS_IOERR);
srp->rq = NULL;
}
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
* level when a command is completed (or has failed).
*/
static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
{
struct sg_request *srp = rq->end_io_data;
struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
}
req = scsi_req(rq);
- scsi_req_init(rq);
-
if (hp->cmd_len > BLK_MAX_CDB)
req->cmd = long_cmdp;
memcpy(req->cmd, cmd, hp->cmd_len);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6f5437..8e5013d9cad4 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
atomic64_dec(&STp->stats->in_flight);
}
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
{
struct st_request *SRpnt = req->end_io_data;
struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
if (IS_ERR(req))
return DRIVER_ERROR << 24;
rq = scsi_req(req);
- scsi_req_init(req);
req->rq_flags |= RQF_QUIET;
mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069ebe4aa6..c05d38016556 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
return -EINVAL;
}
- ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+ ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!ib_dev->ibd_bio_set) {
pr_err("IBLOCK: Unable to create bioset\n");
goto out;
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
struct se_cmd *cmd = bio->bi_private;
struct iblock_req *ibr = cmd->priv;
- if (bio->bi_error) {
- pr_err("bio error: %p, err: %d\n", bio, bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("bio error: %p, err: %d\n", bio, bio->bi_status);
/*
* Bump the ib_bio_err_cnt and release bio.
*/
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
{
struct se_cmd *cmd = bio->bi_private;
- if (bio->bi_error)
- pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
if (cmd) {
- if (bio->bi_error)
+ if (bio->bi_status)
target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
else
target_complete_cmd(cmd, SAM_STAT_GOOD);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb13f8ea..ceec0211e84e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
}
static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
/* pscsi_attach_hba():
*
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
goto fail;
}
- scsi_req_init(req);
-
if (sgl) {
ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
return 0;
}
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
{
struct se_cmd *cmd = req->end_io_data;
struct pscsi_plugin_task *pt = cmd->priv;