summaryrefslogtreecommitdiff
path: root/drivers/scsi/scsi_lib.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-21 10:57:33 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-21 10:57:33 -0800
commit772c8f6f3bbd3ceb94a89373473083e3e1113554 (patch)
treed2b34e8f1841a169d59adf53074de217a9e0f977 /drivers/scsi/scsi_lib.c
parentfd4a61e08aa79f2b7835b25c6f94f27bd2d65990 (diff)
parent818551e2b2c662a1b26de6b4f7d6b8411a838d18 (diff)
Merge tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - blk-mq scheduling framework from me and Omar, with a port of the deadline scheduler for this framework. A port of BFQ from Paolo is in the works, and should be ready for 4.12. - Various fixups and improvements to the above scheduling framework from Omar, Paolo, Bart, me, others. - Cleanup of the exported sysfs blk-mq data into debugfs, from Omar. This allows us to export more information that helps debug hangs or performance issues, without cluttering or abusing the sysfs API. - Fixes for the sbitmap code, the scalable bitmap code that was migrated from blk-mq, from Omar. - Removal of the BLOCK_PC support in struct request, and refactoring of carrying SCSI payloads in the block layer. This cleans up the code nicely, and enables us to kill the SCSI specific parts of struct request, shrinking it down nicely. From Christoph mainly, with help from Hannes. - Support for ranged discard requests and discard merging, also from Christoph. - Support for OPAL in the block layer, and for NVMe as well. Mainly from Scott Bauer, with fixes/updates from various others folks. - Error code fixup for gdrom from Christophe. - cciss pci irq allocation cleanup from Christoph. - Making the cdrom device operations read only, from Kees Cook. - Fixes for duplicate bdi registrations and bdi/queue life time problems from Jan and Dan. - Set of fixes and updates for lightnvm, from Matias and Javier. - A few fixes for nbd from Josef, using idr to name devices and a workqueue deadlock fix on receive. Also marks Josef as the current maintainer of nbd. - Fix from Josef, overwriting queue settings when the number of hardware queues is updated for a blk-mq device. - NVMe fix from Keith, ensuring that we don't repeatedly mark and IO aborted, if we didn't end up aborting it. - SG gap merging fix from Ming Lei for block. - Loop fix also from Ming, fixing a race and crash between setting loop status and IO. - Two block race fixes from Tahsin, fixing request list iteration and fixing a race between device registration and udev device add notifiations. - Double free fix from cgroup writeback, from Tejun. - Another double free fix in blkcg, from Hou Tao. - Partition overflow fix for EFI from Alden Tondettar. * tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block: (156 commits) nvme: Check for Security send/recv support before issuing commands. block/sed-opal: allocate struct opal_dev dynamically block/sed-opal: tone down not supported warnings block: don't defer flushes on blk-mq + scheduling blk-mq-sched: ask scheduler for work, if we failed dispatching leftovers blk-mq: don't special case flush inserts for blk-mq-sched blk-mq-sched: don't add flushes to the head of requeue queue blk-mq: have blk_mq_dispatch_rq_list() return if we queued IO or not block: do not allow updates through sysfs until registration completes lightnvm: set default lun range when no luns are specified lightnvm: fix off-by-one error on target initialization Maintainers: Modify SED list from nvme to block Move stack parameters for sed_ioctl to prevent oversized stack with CONFIG_KASAN uapi: sed-opal fix IOW for activate lsp to use correct struct cdrom: Make device operations read-only elevator: fix loading wrong elevator type for blk-mq devices cciss: switch to pci_irq_alloc_vectors block/loop: fix race between I/O and set_status blk-mq-sched: don't hold queue_lock when calling exit_icq block: set make_request_fn manually in blk_mq_update_nr_hw_queues ...
Diffstat (limited to 'drivers/scsi/scsi_lib.c')
-rw-r--r--drivers/scsi/scsi_lib.c264
1 files changed, 176 insertions, 88 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 78db07fd8055..912fbc3b4543 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -37,8 +37,59 @@
#include "scsi_priv.h"
#include "scsi_logging.h"
+static struct kmem_cache *scsi_sdb_cache;
+static struct kmem_cache *scsi_sense_cache;
+static struct kmem_cache *scsi_sense_isadma_cache;
+static DEFINE_MUTEX(scsi_sense_cache_mutex);
-struct kmem_cache *scsi_sdb_cache;
+static inline struct kmem_cache *
+scsi_select_sense_cache(struct Scsi_Host *shost)
+{
+ return shost->unchecked_isa_dma ?
+ scsi_sense_isadma_cache : scsi_sense_cache;
+}
+
+static void scsi_free_sense_buffer(struct Scsi_Host *shost,
+ unsigned char *sense_buffer)
+{
+ kmem_cache_free(scsi_select_sense_cache(shost), sense_buffer);
+}
+
+static unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost,
+ gfp_t gfp_mask, int numa_node)
+{
+ return kmem_cache_alloc_node(scsi_select_sense_cache(shost), gfp_mask,
+ numa_node);
+}
+
+int scsi_init_sense_cache(struct Scsi_Host *shost)
+{
+ struct kmem_cache *cache;
+ int ret = 0;
+
+ cache = scsi_select_sense_cache(shost);
+ if (cache)
+ return 0;
+
+ mutex_lock(&scsi_sense_cache_mutex);
+ if (shost->unchecked_isa_dma) {
+ scsi_sense_isadma_cache =
+ kmem_cache_create("scsi_sense_cache(DMA)",
+ SCSI_SENSE_BUFFERSIZE, 0,
+ SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
+ if (!scsi_sense_isadma_cache)
+ ret = -ENOMEM;
+ } else {
+ scsi_sense_cache =
+ kmem_cache_create("scsi_sense_cache",
+ SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!scsi_sense_cache)
+ ret = -ENOMEM;
+ }
+
+ mutex_unlock(&scsi_sense_cache_mutex);
+ return ret;
+}
/*
* When to reinvoke queueing after a resource shortage. It's 3 msecs to
@@ -168,22 +219,23 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
req_flags_t rq_flags, int *resid)
{
struct request *req;
- int write = (data_direction == DMA_TO_DEVICE);
+ struct scsi_request *rq;
int ret = DRIVER_ERROR << 24;
- req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM);
+ req = blk_get_request(sdev->request_queue,
+ data_direction == DMA_TO_DEVICE ?
+ REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
if (IS_ERR(req))
return ret;
- blk_rq_set_block_pc(req);
+ rq = scsi_req(req);
+ scsi_req_init(req);
if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
buffer, bufflen, __GFP_RECLAIM))
goto out;
- req->cmd_len = COMMAND_SIZE(cmd[0]);
- memcpy(req->cmd, cmd, req->cmd_len);
- req->sense = sense;
- req->sense_len = 0;
+ rq->cmd_len = COMMAND_SIZE(cmd[0]);
+ memcpy(rq->cmd, cmd, rq->cmd_len);
req->retries = retries;
req->timeout = timeout;
req->cmd_flags |= flags;
@@ -200,11 +252,13 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
* is invalid. Prevent the garbage from being misinterpreted
* and prevent security leaks by zeroing out the excess data.
*/
- if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen))
- memset(buffer + (bufflen - req->resid_len), 0, req->resid_len);
+ if (unlikely(rq->resid_len > 0 && rq->resid_len <= bufflen))
+ memset(buffer + (bufflen - rq->resid_len), 0, rq->resid_len);
if (resid)
- *resid = req->resid_len;
+ *resid = rq->resid_len;
+ if (sense && rq->sense_len)
+ memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
ret = req->errors;
out:
blk_put_request(req);
@@ -529,7 +583,7 @@ void scsi_run_host_queues(struct Scsi_Host *shost)
static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
{
- if (cmd->request->cmd_type == REQ_TYPE_FS) {
+ if (!blk_rq_is_passthrough(cmd->request)) {
struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
if (drv->uninit_command)
@@ -645,14 +699,13 @@ static bool scsi_end_request(struct request *req, int error,
if (bidi_bytes)
scsi_release_bidi_buffers(cmd);
+ scsi_release_buffers(cmd);
+ scsi_put_command(cmd);
spin_lock_irqsave(q->queue_lock, flags);
blk_finish_request(req, error);
spin_unlock_irqrestore(q->queue_lock, flags);
- scsi_release_buffers(cmd);
-
- scsi_put_command(cmd);
scsi_run_queue(q);
}
@@ -754,18 +807,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
sense_deferred = scsi_sense_is_deferred(&sshdr);
}
- if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */
+ if (blk_rq_is_passthrough(req)) {
if (result) {
- if (sense_valid && req->sense) {
+ if (sense_valid) {
/*
* SG_IO wants current and deferred errors
*/
- int len = 8 + cmd->sense_buffer[7];
-
- if (len > SCSI_SENSE_BUFFERSIZE)
- len = SCSI_SENSE_BUFFERSIZE;
- memcpy(req->sense, cmd->sense_buffer, len);
- req->sense_len = len;
+ scsi_req(req)->sense_len =
+ min(8 + cmd->sense_buffer[7],
+ SCSI_SENSE_BUFFERSIZE);
}
if (!sense_deferred)
error = __scsi_error_from_host_byte(cmd, result);
@@ -775,14 +825,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
*/
req->errors = cmd->result;
- req->resid_len = scsi_get_resid(cmd);
+ scsi_req(req)->resid_len = scsi_get_resid(cmd);
if (scsi_bidi_cmnd(cmd)) {
/*
* Bidi commands Must be complete as a whole,
* both sides at once.
*/
- req->next_rq->resid_len = scsi_in(cmd)->resid;
+ scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
if (scsi_end_request(req, 0, blk_rq_bytes(req),
blk_rq_bytes(req->next_rq)))
BUG();
@@ -790,15 +840,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
}
} else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) {
/*
- * Certain non BLOCK_PC requests are commands that don't
- * actually transfer anything (FLUSH), so cannot use
+ * Flush commands do not transfers any data, and thus cannot use
* good_bytes != blk_rq_bytes(req) as the signal for an error.
* This sets the error explicitly for the problem case.
*/
error = __scsi_error_from_host_byte(cmd, result);
}
- /* no bidi support for !REQ_TYPE_BLOCK_PC yet */
+ /* no bidi support for !blk_rq_is_passthrough yet */
BUG_ON(blk_bidi_rq(req));
/*
@@ -810,8 +859,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
blk_rq_sectors(req), good_bytes));
/*
- * Recovered errors need reporting, but they're always treated
- * as success, so fiddle the result code here. For BLOCK_PC
+ * Recovered errors need reporting, but they're always treated as
+ * success, so fiddle the result code here. For passthrough requests
* we already took a copy of the original into rq->errors which
* is what gets returned to the user
*/
@@ -825,7 +874,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
else if (!(req->rq_flags & RQF_QUIET))
scsi_print_sense(cmd);
result = 0;
- /* BLOCK_PC may have set error */
+ /* for passthrough error may be set */
error = 0;
}
@@ -1110,42 +1159,33 @@ err_exit:
}
EXPORT_SYMBOL(scsi_init_io);
-static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
- struct request *req)
+void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
{
- struct scsi_cmnd *cmd;
-
- if (!req->special) {
- /* Bail if we can't get a reference to the device */
- if (!get_device(&sdev->sdev_gendev))
- return NULL;
-
- cmd = scsi_get_command(sdev, GFP_ATOMIC);
- if (unlikely(!cmd)) {
- put_device(&sdev->sdev_gendev);
- return NULL;
- }
- req->special = cmd;
- } else {
- cmd = req->special;
- }
+ void *buf = cmd->sense_buffer;
+ void *prot = cmd->prot_sdb;
+ unsigned long flags;
- /* pull a tag out of the request if we have one */
- cmd->tag = req->tag;
- cmd->request = req;
+ /* zero out the cmd, except for the embedded scsi_request */
+ memset((char *)cmd + sizeof(cmd->req), 0,
+ sizeof(*cmd) - sizeof(cmd->req));
- cmd->cmnd = req->cmd;
- cmd->prot_op = SCSI_PROT_NORMAL;
+ cmd->device = dev;
+ cmd->sense_buffer = buf;
+ cmd->prot_sdb = prot;
+ INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
+ cmd->jiffies_at_alloc = jiffies;
- return cmd;
+ spin_lock_irqsave(&dev->list_lock, flags);
+ list_add_tail(&cmd->list, &dev->cmd_list);
+ spin_unlock_irqrestore(&dev->list_lock, flags);
}
-static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
+static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd = req->special;
/*
- * BLOCK_PC requests may transfer data, in which case they must
+ * Passthrough requests may transfer data, in which case they must
* a bio attached to them. Or they might contain a SCSI command
* that does not transfer data, in which case they may optionally
* submit a request without an attached bio.
@@ -1160,14 +1200,15 @@ static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
memset(&cmd->sdb, 0, sizeof(cmd->sdb));
}
- cmd->cmd_len = req->cmd_len;
+ cmd->cmd_len = scsi_req(req)->cmd_len;
+ cmd->cmnd = scsi_req(req)->cmd;
cmd->transfersize = blk_rq_bytes(req);
cmd->allowed = req->retries;
return BLKPREP_OK;
}
/*
- * Setup a REQ_TYPE_FS command. These are simple request from filesystems
+ * Setup a normal block command. These are simple request from filesystems
* that still need to be translated to SCSI CDBs from the ULD.
*/
static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
@@ -1180,6 +1221,7 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
return ret;
}
+ cmd->cmnd = scsi_req(req)->cmd = scsi_req(req)->__cmd;
memset(cmd->cmnd, 0, BLK_MAX_CDB);
return scsi_cmd_to_driver(cmd)->init_command(cmd);
}
@@ -1195,14 +1237,10 @@ static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req)
else
cmd->sc_data_direction = DMA_FROM_DEVICE;
- switch (req->cmd_type) {
- case REQ_TYPE_FS:
+ if (blk_rq_is_scsi(req))
+ return scsi_setup_scsi_cmnd(sdev, req);
+ else
return scsi_setup_fs_cmnd(sdev, req);
- case REQ_TYPE_BLOCK_PC:
- return scsi_setup_blk_pc_cmnd(sdev, req);
- default:
- return BLKPREP_KILL;
- }
}
static int
@@ -1298,19 +1336,28 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
static int scsi_prep_fn(struct request_queue *q, struct request *req)
{
struct scsi_device *sdev = q->queuedata;
- struct scsi_cmnd *cmd;
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
int ret;
ret = scsi_prep_state_check(sdev, req);
if (ret != BLKPREP_OK)
goto out;
- cmd = scsi_get_cmd_from_req(sdev, req);
- if (unlikely(!cmd)) {
- ret = BLKPREP_DEFER;
- goto out;
+ if (!req->special) {
+ /* Bail if we can't get a reference to the device */
+ if (unlikely(!get_device(&sdev->sdev_gendev))) {
+ ret = BLKPREP_DEFER;
+ goto out;
+ }
+
+ scsi_init_command(sdev, cmd);
+ req->special = cmd;
}
+ cmd->tag = req->tag;
+ cmd->request = req;
+ cmd->prot_op = SCSI_PROT_NORMAL;
+
ret = scsi_setup_cmnd(sdev, req);
out:
return scsi_prep_return(q, req, ret);
@@ -1827,7 +1874,9 @@ static int scsi_mq_prep_fn(struct request *req)
unsigned char *sense_buf = cmd->sense_buffer;
struct scatterlist *sg;
- memset(cmd, 0, sizeof(struct scsi_cmnd));
+ /* zero out the cmd, except for the embedded scsi_request */
+ memset((char *)cmd + sizeof(cmd->req), 0,
+ sizeof(*cmd) - sizeof(cmd->req));
req->special = cmd;
@@ -1837,7 +1886,6 @@ static int scsi_mq_prep_fn(struct request *req)
cmd->tag = req->tag;
- cmd->cmnd = req->cmd;
cmd->prot_op = SCSI_PROT_NORMAL;
INIT_LIST_HEAD(&cmd->list);
@@ -1912,7 +1960,6 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!scsi_host_queue_ready(q, shost, sdev))
goto out_dec_target_busy;
-
if (!(req->rq_flags & RQF_DONTPREP)) {
ret = prep_to_mq(scsi_mq_prep_fn(req));
if (ret != BLK_MQ_RQ_QUEUE_OK)
@@ -1982,21 +2029,24 @@ static int scsi_init_request(void *data, struct request *rq,
unsigned int hctx_idx, unsigned int request_idx,
unsigned int numa_node)
{
+ struct Scsi_Host *shost = data;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
- cmd->sense_buffer = kzalloc_node(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL,
- numa_node);
+ cmd->sense_buffer =
+ scsi_alloc_sense_buffer(shost, GFP_KERNEL, numa_node);
if (!cmd->sense_buffer)
return -ENOMEM;
+ cmd->req.sense = cmd->sense_buffer;
return 0;
}
static void scsi_exit_request(void *data, struct request *rq,
unsigned int hctx_idx, unsigned int request_idx)
{
+ struct Scsi_Host *shost = data;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
- kfree(cmd->sense_buffer);
+ scsi_free_sense_buffer(shost, cmd->sense_buffer);
}
static int scsi_map_queues(struct blk_mq_tag_set *set)
@@ -2029,7 +2079,7 @@ static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
return bounce_limit;
}
-static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
+void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
{
struct device *dev = shost->dma_dev;
@@ -2064,28 +2114,64 @@ static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
*/
blk_queue_dma_alignment(q, 0x03);
}
+EXPORT_SYMBOL_GPL(__scsi_init_queue);
-struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
- request_fn_proc *request_fn)
+static int scsi_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
{
- struct request_queue *q;
+ struct Scsi_Host *shost = q->rq_alloc_data;
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
- q = blk_init_queue(request_fn, NULL);
- if (!q)
- return NULL;
- __scsi_init_queue(shost, q);
- return q;
+ memset(cmd, 0, sizeof(*cmd));
+
+ cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp, NUMA_NO_NODE);
+ if (!cmd->sense_buffer)
+ goto fail;
+ cmd->req.sense = cmd->sense_buffer;
+
+ if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
+ cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp);
+ if (!cmd->prot_sdb)
+ goto fail_free_sense;
+ }
+
+ return 0;
+
+fail_free_sense:
+ scsi_free_sense_buffer(shost, cmd->sense_buffer);
+fail:
+ return -ENOMEM;
+}
+
+static void scsi_exit_rq(struct request_queue *q, struct request *rq)
+{
+ struct Scsi_Host *shost = q->rq_alloc_data;
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+ if (cmd->prot_sdb)
+ kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
+ scsi_free_sense_buffer(shost, cmd->sense_buffer);
}
-EXPORT_SYMBOL(__scsi_alloc_queue);
struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
{
+ struct Scsi_Host *shost = sdev->host;
struct request_queue *q;
- q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
if (!q)
return NULL;
+ q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
+ q->rq_alloc_data = shost;
+ q->request_fn = scsi_request_fn;
+ q->init_rq_fn = scsi_init_rq;
+ q->exit_rq_fn = scsi_exit_rq;
+
+ if (blk_init_allocated_queue(q) < 0) {
+ blk_cleanup_queue(q);
+ return NULL;
+ }
+ __scsi_init_queue(shost, q);
blk_queue_prep_rq(q, scsi_prep_fn);
blk_queue_unprep_rq(q, scsi_unprep_fn);
blk_queue_softirq_done(q, scsi_softirq_done);
@@ -2209,6 +2295,8 @@ int __init scsi_init_queue(void)
void scsi_exit_queue(void)
{
+ kmem_cache_destroy(scsi_sense_cache);
+ kmem_cache_destroy(scsi_sense_isadma_cache);
kmem_cache_destroy(scsi_sdb_cache);
}