summaryrefslogtreecommitdiff
path: root/drivers/scsi/storvsc_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/storvsc_drv.c')
-rw-r--r--drivers/scsi/storvsc_drv.c182
1 files changed, 100 insertions, 82 deletions
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 7f12d931fe7c..6e4112143c76 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -149,6 +149,8 @@ struct hv_fc_wwn_packet {
*/
static int vmstor_proto_version;
+static bool hv_dev_is_fc(struct hv_device *hv_dev);
+
#define STORVSC_LOGGING_NONE 0
#define STORVSC_LOGGING_ERROR 1
#define STORVSC_LOGGING_WARN 2
@@ -169,6 +171,12 @@ do { \
dev_warn(&(dev)->device, fmt, ##__VA_ARGS__); \
} while (0)
+#define storvsc_log_ratelimited(dev, level, fmt, ...) \
+do { \
+ if (do_logging(level)) \
+ dev_warn_ratelimited(&(dev)->device, fmt, ##__VA_ARGS__); \
+} while (0)
+
struct vmscsi_request {
u16 length;
u8 srb_status;
@@ -316,6 +324,9 @@ enum storvsc_request_type {
#define SRB_STATUS_ABORTED 0x02
#define SRB_STATUS_ERROR 0x04
#define SRB_STATUS_INVALID_REQUEST 0x06
+#define SRB_STATUS_TIMEOUT 0x09
+#define SRB_STATUS_SELECTION_TIMEOUT 0x0A
+#define SRB_STATUS_BUS_RESET 0x0E
#define SRB_STATUS_DATA_OVERRUN 0x12
#define SRB_STATUS_INVALID_LUN 0x20
#define SRB_STATUS_INTERNAL_ERROR 0x30
@@ -327,6 +338,7 @@ enum storvsc_request_type {
*/
static int storvsc_ringbuffer_size = (128 * 1024);
+static int aligned_ringbuffer_size;
static u32 max_outstanding_req_per_channel;
static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth);
@@ -350,7 +362,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowater,
/*
* Timeout in seconds for all devices managed by this driver.
*/
-static int storvsc_timeout = 180;
+static const int storvsc_timeout = 180;
#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
static struct scsi_transport_template *fc_transport_template;
@@ -366,6 +378,7 @@ static void storvsc_on_channel_callback(void *context);
#define STORVSC_FC_MAX_LUNS_PER_TARGET 255
#define STORVSC_FC_MAX_TARGETS 128
#define STORVSC_FC_MAX_CHANNELS 8
+#define STORVSC_FC_MAX_XFER_SIZE ((u32)(512 * 1024))
#define STORVSC_IDE_MAX_LUNS_PER_TARGET 64
#define STORVSC_IDE_MAX_TARGETS 1
@@ -471,7 +484,7 @@ static void storvsc_device_scan(struct work_struct *work)
sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
if (!sdev)
goto done;
- scsi_rescan_device(&sdev->sdev_gendev);
+ scsi_rescan_device(sdev);
scsi_device_put(sdev);
done:
@@ -683,8 +696,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
new_sc->next_request_id_callback = storvsc_next_request_id;
ret = vmbus_open(new_sc,
- storvsc_ringbuffer_size,
- storvsc_ringbuffer_size,
+ aligned_ringbuffer_size,
+ aligned_ringbuffer_size,
(void *)&props,
sizeof(struct vmstorage_channel_properties),
storvsc_on_channel_callback, new_sc);
@@ -755,7 +768,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
return;
}
- t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
+ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ);
if (t == 0) {
dev_err(dev, "Failed to create sub-channel: timed out\n");
return;
@@ -763,7 +776,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO ||
vstor_packet->status != 0) {
- dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
+ dev_err(dev, "Failed to create sub-channel: op=%d, host=0x%x\n",
vstor_packet->operation, vstor_packet->status);
return;
}
@@ -820,7 +833,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device,
if (ret != 0)
return ret;
- t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ);
if (t == 0)
return -ETIMEDOUT;
@@ -910,14 +923,13 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
/*
* Allocate state to manage the sub-channels.
- * We allocate an array based on the numbers of possible CPUs
- * (Hyper-V does not support cpu online/offline).
- * This Array will be sparseley populated with unique
- * channels - primary + sub-channels.
- * We will however populate all the slots to evenly distribute
- * the load.
+ * We allocate an array based on the number of CPU ids. This array
+ * is initially sparsely populated for the CPUs assigned to channels:
+ * primary + sub-channels. As I/Os are initiated by different CPUs,
+ * the slots for all online CPUs are populated to evenly distribute
+ * the load across all channels.
*/
- stor_device->stor_chns = kcalloc(num_possible_cpus(), sizeof(void *),
+ stor_device->stor_chns = kcalloc(nr_cpu_ids, sizeof(void *),
GFP_KERNEL);
if (stor_device->stor_chns == NULL)
return -ENOMEM;
@@ -980,6 +992,10 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
case SRB_STATUS_ABORTED:
case SRB_STATUS_INVALID_REQUEST:
case SRB_STATUS_INTERNAL_ERROR:
+ case SRB_STATUS_TIMEOUT:
+ case SRB_STATUS_SELECTION_TIMEOUT:
+ case SRB_STATUS_BUS_RESET:
+ case SRB_STATUS_DATA_OVERRUN:
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
/* Check for capacity change */
if ((asc == 0x2a) && (ascq == 0x9)) {
@@ -1129,6 +1145,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device,
* not correctly handle:
* INQUIRY command with page code parameter set to 0x80
* MODE_SENSE command with cmd[2] == 0x1c
+ * MAINTENANCE_IN is not supported by HyperV FC passthrough
*
* Setup srb and scsi status so this won't be fatal.
* We do this so we can distinguish truly fatal failues
@@ -1136,7 +1153,9 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device,
*/
if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) ||
- (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) {
+ (stor_pkt->vm_srb.cdb[0] == MODE_SENSE) ||
+ (stor_pkt->vm_srb.cdb[0] == MAINTENANCE_IN &&
+ hv_dev_is_fc(device))) {
vstor_packet->vm_srb.scsi_status = 0;
vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS;
}
@@ -1163,8 +1182,8 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device,
int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ?
STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR;
- storvsc_log(device, loglevel,
- "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n",
+ storvsc_log_ratelimited(device, loglevel,
+ "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x host 0x%x\n",
scsi_cmd_to_rq(request->cmd)->tag,
stor_pkt->vm_srb.cdb[0],
vstor_packet->vm_srb.scsi_status,
@@ -1331,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
return ret;
ret = storvsc_channel_init(device, is_fc);
+ if (ret)
+ vmbus_close(device->channel);
return ret;
}
@@ -1385,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
}
/*
- * Our channel array is sparsley populated and we
+ * Our channel array could be sparsley populated and we
* initiated I/O on a processor/hw-q that does not
* currently have a designated channel. Fix this.
* The strategy is simple:
- * I. Ensure NUMA locality
- * II. Distribute evenly (best effort)
+ * I. Prefer the channel associated with the current CPU
+ * II. Ensure NUMA locality
+ * III. Distribute evenly (best effort)
*/
+ /* Prefer the channel on the I/O issuing processor/hw-q */
+ if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus))
+ return stor_device->stor_chns[q_num];
+
node_mask = cpumask_of_node(cpu_to_node(q_num));
num_channels = 0;
@@ -1448,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
/* See storvsc_change_target_cpu(). */
outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
if (outgoing_channel != NULL) {
- if (outgoing_channel->target_cpu == q_num) {
- /*
- * Ideally, we want to pick a different channel if
- * available on the same NUMA node.
- */
- node_mask = cpumask_of_node(cpu_to_node(q_num));
- for_each_cpu_wrap(tgt_cpu,
- &stor_device->alloced_cpus, q_num + 1) {
- if (!cpumask_test_cpu(tgt_cpu, node_mask))
- continue;
- if (tgt_cpu == q_num)
- continue;
- channel = READ_ONCE(
- stor_device->stor_chns[tgt_cpu]);
- if (channel == NULL)
- continue;
- if (hv_get_avail_to_write_percent(
- &channel->outbound)
- > ring_avail_percent_lowater) {
- outgoing_channel = channel;
- goto found_channel;
- }
- }
+ if (hv_get_avail_to_write_percent(&outgoing_channel->outbound)
+ > ring_avail_percent_lowater)
+ goto found_channel;
- /*
- * All the other channels on the same NUMA node are
- * busy. Try to use the channel on the current CPU
- */
- if (hv_get_avail_to_write_percent(
- &outgoing_channel->outbound)
- > ring_avail_percent_lowater)
+ /*
+ * Channel is busy, try to find a channel on the same NUMA node
+ */
+ node_mask = cpumask_of_node(cpu_to_node(q_num));
+ for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
+ q_num + 1) {
+ if (!cpumask_test_cpu(tgt_cpu, node_mask))
+ continue;
+ channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
+ if (!channel)
+ continue;
+ if (hv_get_avail_to_write_percent(&channel->outbound)
+ > ring_avail_percent_lowater) {
+ outgoing_channel = channel;
goto found_channel;
+ }
+ }
- /*
- * If we reach here, all the channels on the current
- * NUMA node are busy. Try to find a channel in
- * other NUMA nodes
- */
- for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
- if (cpumask_test_cpu(tgt_cpu, node_mask))
- continue;
- channel = READ_ONCE(
- stor_device->stor_chns[tgt_cpu]);
- if (channel == NULL)
- continue;
- if (hv_get_avail_to_write_percent(
- &channel->outbound)
- > ring_avail_percent_lowater) {
- outgoing_channel = channel;
- goto found_channel;
- }
+ /*
+ * If we reach here, all the channels on the current
+ * NUMA node are busy. Try to find a channel in
+ * all NUMA nodes
+ */
+ for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
+ q_num + 1) {
+ channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
+ if (!channel)
+ continue;
+ if (hv_get_avail_to_write_percent(&channel->outbound)
+ > ring_avail_percent_lowater) {
+ outgoing_channel = channel;
+ goto found_channel;
}
}
+ /*
+ * If we reach here, all the channels are busy. Use the
+ * original channel found.
+ */
} else {
spin_lock_irqsave(&stor_device->lock, flags);
outgoing_channel = stor_device->stor_chns[q_num];
@@ -1565,7 +1580,8 @@ static int storvsc_device_alloc(struct scsi_device *sdevice)
return 0;
}
-static int storvsc_device_configure(struct scsi_device *sdevice)
+static int storvsc_sdev_configure(struct scsi_device *sdevice,
+ struct queue_limits *lim)
{
blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
@@ -1593,7 +1609,7 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
return 0;
}
-static int storvsc_get_chs(struct scsi_device *sdev, struct block_device * bdev,
+static int storvsc_get_chs(struct scsi_device *sdev, struct gendisk *unused,
sector_t capacity, int *info)
{
sector_t nsect = capacity;
@@ -1648,7 +1664,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
if (ret != 0)
return FAILED;
- t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ);
if (t == 0)
return TIMEOUT_ERROR;
@@ -1673,10 +1689,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
*/
static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
{
-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
- if (scmnd->device->host->transportt == fc_transport_template)
- return fc_eh_timed_out(scmnd);
-#endif
return SCSI_EH_RESET_TIMER;
}
@@ -1784,6 +1796,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
length = scsi_bufflen(scmnd);
payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
+ payload->range.len = 0;
payload_sz = 0;
if (scsi_sg_count(scmnd)) {
@@ -1802,6 +1815,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
return SCSI_MLQUEUE_DEVICE_BUSY;
}
+ payload->rangecount = 1;
payload->range.len = length;
payload->range.offset = offset_in_hvpg;
@@ -1870,8 +1884,8 @@ static struct scsi_host_template scsi_driver = {
.eh_host_reset_handler = storvsc_host_reset_handler,
.proc_name = "storvsc_host",
.eh_timed_out = storvsc_eh_timed_out,
- .slave_alloc = storvsc_device_alloc,
- .slave_configure = storvsc_device_configure,
+ .sdev_init = storvsc_device_alloc,
+ .sdev_configure = storvsc_sdev_configure,
.cmd_per_lun = 2048,
.this_id = -1,
/* Ensure there are no gaps in presented sgls */
@@ -1921,8 +1935,8 @@ static int storvsc_probe(struct hv_device *device,
int num_present_cpus = num_present_cpus();
struct Scsi_Host *host;
struct hv_host_device *host_dev;
- bool dev_is_ide = ((dev_id->driver_data == IDE_GUID) ? true : false);
- bool is_fc = ((dev_id->driver_data == SFC_GUID) ? true : false);
+ bool dev_is_ide = dev_id->driver_data == IDE_GUID;
+ bool is_fc = dev_id->driver_data == SFC_GUID;
int target = 0;
struct storvsc_device *stor_device;
int max_sub_channels = 0;
@@ -1969,7 +1983,7 @@ static int storvsc_probe(struct hv_device *device,
dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1);
stor_device->port_number = host->host_no;
- ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc);
+ ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc);
if (ret)
goto err_out1;
@@ -2006,6 +2020,9 @@ static int storvsc_probe(struct hv_device *device,
* protecting it from any weird value.
*/
max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
+ if (is_fc)
+ max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE);
+
/* max_hw_sectors_kb */
host->max_sectors = max_xfer_bytes >> 9;
/*
@@ -2157,7 +2174,7 @@ static int storvsc_resume(struct hv_device *hv_dev)
{
int ret;
- ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size,
+ ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size,
hv_dev_is_fc(hv_dev));
return ret;
}
@@ -2191,8 +2208,9 @@ static int __init storvsc_drv_init(void)
* the ring buffer indices) by the max request size (which is
* vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64)
*/
+ aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size);
max_outstanding_req_per_channel =
- ((storvsc_ringbuffer_size - PAGE_SIZE) /
+ ((aligned_ringbuffer_size - PAGE_SIZE) /
ALIGN(MAX_MULTIPAGE_BUFFER_PACKET +
sizeof(struct vstor_packet) + sizeof(u64),
sizeof(u64)));