diff options
Diffstat (limited to 'drivers/scsi/storvsc_drv.c')
| -rw-r--r-- | drivers/scsi/storvsc_drv.c | 943 |
1 files changed, 598 insertions, 345 deletions
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 84380bae20f1..6e4112143c76 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1,19 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2009, Microsoft Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * * Authors: * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> @@ -33,6 +21,8 @@ #include <linux/device.h> #include <linux/hyperv.h> #include <linux/blkdev.h> +#include <linux/dma-mapping.h> + #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_host.h> @@ -64,13 +54,15 @@ #define VMSTOR_PROTO_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \ (((MINOR_) & 0xff))) - #define VMSTOR_PROTO_VERSION_WIN6 VMSTOR_PROTO_VERSION(2, 0) #define VMSTOR_PROTO_VERSION_WIN7 VMSTOR_PROTO_VERSION(4, 2) #define VMSTOR_PROTO_VERSION_WIN8 VMSTOR_PROTO_VERSION(5, 1) #define VMSTOR_PROTO_VERSION_WIN8_1 VMSTOR_PROTO_VERSION(6, 0) #define VMSTOR_PROTO_VERSION_WIN10 VMSTOR_PROTO_VERSION(6, 2) +/* channel callback timeout in ms */ +#define CALLBACK_TIMEOUT 2 + /* Packet structure describing virtual storage requests. */ enum vstor_packet_operation { VSTOR_OPERATION_COMPLETE_IO = 1, @@ -146,27 +138,19 @@ struct hv_fc_wwn_packet { */ #define STORVSC_MAX_CMD_LEN 0x10 -#define POST_WIN7_STORVSC_SENSE_BUFFER_SIZE 0x14 -#define PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE 0x12 - +/* Sense buffer size is the same for all versions since Windows 8 */ #define STORVSC_SENSE_BUFFER_SIZE 0x14 #define STORVSC_MAX_BUF_LEN_WITH_PADDING 0x14 /* - * Sense buffer size changed in win8; have a run-time - * variable to track the size we should use. This value will - * likely change during protocol negotiation but it is valid - * to start by assuming pre-Win8. - */ -static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; - -/* * The storage protocol version is determined during the * initial exchange with the host. It will indicate which * storage functionality is available in the host. */ static int vmstor_proto_version; +static bool hv_dev_is_fc(struct hv_device *hv_dev); + #define STORVSC_LOGGING_NONE 0 #define STORVSC_LOGGING_ERROR 1 #define STORVSC_LOGGING_WARN 2 @@ -187,17 +171,11 @@ do { \ dev_warn(&(dev)->device, fmt, ##__VA_ARGS__); \ } while (0) -struct vmscsi_win8_extension { - /* - * The following were added in Windows 8 - */ - u16 reserve; - u8 queue_tag; - u8 queue_action; - u32 srb_flags; - u32 time_out_value; - u32 queue_sort_ey; -} __packed; +#define storvsc_log_ratelimited(dev, level, fmt, ...) \ +do { \ + if (do_logging(level)) \ + dev_warn_ratelimited(&(dev)->device, fmt, ##__VA_ARGS__); \ +} while (0) struct vmscsi_request { u16 length; @@ -224,58 +202,23 @@ struct vmscsi_request { /* * The following was added in win8. */ - struct vmscsi_win8_extension win8_extension; + u16 reserve; + u8 queue_tag; + u8 queue_action; + u32 srb_flags; + u32 time_out_value; + u32 queue_sort_ey; } __attribute((packed)); - /* - * The size of the vmscsi_request has changed in win8. The - * additional size is because of new elements added to the - * structure. These elements are valid only when we are talking - * to a win8 host. - * Track the correction to size we need to apply. This value - * will likely change during protocol negotiation but it is - * valid to start by assuming pre-Win8. + * The list of windows version in order of preference. */ -static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); - -/* - * The list of storage protocols in order of preference. - */ -struct vmstor_protocol { - int protocol_version; - int sense_buffer_size; - int vmscsi_size_delta; -}; - -static const struct vmstor_protocol vmstor_protocols[] = { - { +static const int protocol_version[] = { VMSTOR_PROTO_VERSION_WIN10, - POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, - 0 - }, - { VMSTOR_PROTO_VERSION_WIN8_1, - POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, - 0 - }, - { VMSTOR_PROTO_VERSION_WIN8, - POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, - 0 - }, - { - VMSTOR_PROTO_VERSION_WIN7, - PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, - sizeof(struct vmscsi_win8_extension), - }, - { - VMSTOR_PROTO_VERSION_WIN6, - PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, - sizeof(struct vmscsi_win8_extension), - } }; @@ -368,16 +311,25 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_TIMEOUT 0x09 +#define SRB_STATUS_SELECTION_TIMEOUT 0x0A +#define SRB_STATUS_BUS_RESET 0x0E +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 +#define SRB_STATUS_INTERNAL_ERROR 0x30 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -385,14 +337,20 @@ enum storvsc_request_type { * This is the end of Protocol specific defines. */ -static int storvsc_ringbuffer_size = (256 * PAGE_SIZE); +static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; +static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); static int storvsc_vcpus_per_sub_channel = 4; +static unsigned int storvsc_max_hw_queues; module_param(storvsc_ringbuffer_size, int, S_IRUGO); MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)"); +module_param(storvsc_max_hw_queues, uint, 0644); +MODULE_PARM_DESC(storvsc_max_hw_queues, "Maximum number of hardware queues"); + module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO); MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels"); @@ -404,12 +362,13 @@ MODULE_PARM_DESC(ring_avail_percent_lowater, /* * Timeout in seconds for all devices managed by this driver. */ -static int storvsc_timeout = 180; +static const int storvsc_timeout = 180; #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) static struct scsi_transport_template *fc_transport_template; #endif +static struct scsi_host_template scsi_driver; static void storvsc_on_channel_callback(void *context); #define STORVSC_MAX_LUNS_PER_TARGET 255 @@ -419,11 +378,18 @@ static void storvsc_on_channel_callback(void *context); #define STORVSC_FC_MAX_LUNS_PER_TARGET 255 #define STORVSC_FC_MAX_TARGETS 128 #define STORVSC_FC_MAX_CHANNELS 8 +#define STORVSC_FC_MAX_XFER_SIZE ((u32)(512 * 1024)) #define STORVSC_IDE_MAX_LUNS_PER_TARGET 64 #define STORVSC_IDE_MAX_TARGETS 1 #define STORVSC_IDE_MAX_CHANNELS 1 +/* + * Upper bound on the size of a storvsc packet. + */ +#define STORVSC_MAX_PKT_SIZE (sizeof(struct vmpacket_descriptor) +\ + sizeof(struct vstor_packet)) + struct storvsc_cmd_request { struct scsi_cmnd *cmd; @@ -473,6 +439,11 @@ struct storvsc_device { * Mask of CPUs bound to subchannels. */ struct cpumask alloced_cpus; + /* + * Serializes modifications of stor_chns[] from storvsc_do_io() + * and storvsc_change_target_cpu(). + */ + spinlock_t lock; /* Used for vsc/vsp channel reset process */ struct storvsc_cmd_request init_request; struct storvsc_cmd_request reset_request; @@ -513,7 +484,7 @@ static void storvsc_device_scan(struct work_struct *work) sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); if (!sdev) goto done; - scsi_rescan_device(&sdev->sdev_gendev); + scsi_rescan_device(sdev); scsi_device_put(sdev); done: @@ -530,7 +501,7 @@ static void storvsc_host_scan(struct work_struct *work) host = host_device->host; /* * Before scanning the host, first check to see if any of the - * currrently known devices have been hot removed. We issue a + * currently known devices have been hot removed. We issue a * "unit ready" command against all currently known devices. * This I/O will result in an error for devices that have been * removed. As part of handling the I/O error, we remove the device. @@ -632,6 +603,81 @@ get_in_err: } +static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old, + u32 new) +{ + struct storvsc_device *stor_device; + struct vmbus_channel *cur_chn; + bool old_is_alloced = false; + struct hv_device *device; + unsigned long flags; + int cpu; + + device = channel->primary_channel ? + channel->primary_channel->device_obj + : channel->device_obj; + stor_device = get_out_stor_device(device); + if (!stor_device) + return; + + /* See storvsc_do_io() -> get_og_chn(). */ + spin_lock_irqsave(&stor_device->lock, flags); + + /* + * Determines if the storvsc device has other channels assigned to + * the "old" CPU to update the alloced_cpus mask and the stor_chns + * array. + */ + if (device->channel != channel && device->channel->target_cpu == old) { + cur_chn = device->channel; + old_is_alloced = true; + goto old_is_alloced; + } + list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) { + if (cur_chn == channel) + continue; + if (cur_chn->target_cpu == old) { + old_is_alloced = true; + goto old_is_alloced; + } + } + +old_is_alloced: + if (old_is_alloced) + WRITE_ONCE(stor_device->stor_chns[old], cur_chn); + else + cpumask_clear_cpu(old, &stor_device->alloced_cpus); + + /* "Flush" the stor_chns array. */ + for_each_possible_cpu(cpu) { + if (stor_device->stor_chns[cpu] && !cpumask_test_cpu( + cpu, &stor_device->alloced_cpus)) + WRITE_ONCE(stor_device->stor_chns[cpu], NULL); + } + + WRITE_ONCE(stor_device->stor_chns[new], channel); + cpumask_set_cpu(new, &stor_device->alloced_cpus); + + spin_unlock_irqrestore(&stor_device->lock, flags); +} + +static u64 storvsc_next_request_id(struct vmbus_channel *channel, u64 rqst_addr) +{ + struct storvsc_cmd_request *request = + (struct storvsc_cmd_request *)(unsigned long)rqst_addr; + + if (rqst_addr == VMBUS_RQST_INIT) + return VMBUS_RQST_INIT; + if (rqst_addr == VMBUS_RQST_RESET) + return VMBUS_RQST_RESET; + + /* + * Cannot return an ID of 0, which is reserved for an unsolicited + * message from Hyper-V. + */ + return (u64)blk_mq_unique_tag(scsi_cmd_to_rq(request->cmd)) + 1; +} + static void handle_sc_creation(struct vmbus_channel *new_sc) { struct hv_device *device = new_sc->primary_channel->device_obj; @@ -645,10 +691,13 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) return; memset(&props, 0, sizeof(struct vmstorage_channel_properties)); + new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE; + + new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -659,6 +708,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) return; } + new_sc->change_target_cpu_callback = storvsc_change_target_cpu; + /* Add the sub-channel to the array of available channels. */ stor_device->stor_chns[new_sc->target_cpu] = new_sc; cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus); @@ -668,13 +719,22 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) { struct device *dev = &device->device; struct storvsc_device *stor_device; - int num_cpus = num_online_cpus(); int num_sc; struct storvsc_cmd_request *request; struct vstor_packet *vstor_packet; int ret, t; - num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns); + /* + * If the number of CPUs is artificially restricted, such as + * with maxcpus=1 on the kernel boot line, Hyper-V could offer + * sub-channels >= the number of CPUs. These sub-channels + * should not be created. The primary channel is already created + * and assigned to one CPU, so check against # CPUs - 1. + */ + num_sc = min((int)(num_online_cpus() - 1), max_chns); + if (!num_sc) + return; + stor_device = get_out_stor_device(device); if (!stor_device) return; @@ -698,9 +758,8 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) vstor_packet->sub_channel_count = num_sc; ret = vmbus_sendpacket(device->channel, vstor_packet, - (sizeof(struct vstor_packet) - - vmscsi_size_delta), - (unsigned long)request, + sizeof(struct vstor_packet), + VMBUS_RQST_INIT, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); @@ -709,7 +768,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) return; } - t = wait_for_completion_timeout(&request->wait_event, 10*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) { dev_err(dev, "Failed to create sub-channel: timed out\n"); return; @@ -717,7 +776,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || vstor_packet->status != 0) { - dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n", + dev_err(dev, "Failed to create sub-channel: op=%d, host=0x%x\n", vstor_packet->operation, vstor_packet->status); return; } @@ -753,24 +812,28 @@ static int storvsc_execute_vstor_op(struct hv_device *device, struct storvsc_cmd_request *request, bool status_check) { + struct storvsc_device *stor_device; struct vstor_packet *vstor_packet; int ret, t; + stor_device = get_out_stor_device(device); + if (!stor_device) + return -ENODEV; + vstor_packet = &request->vstor_packet; init_completion(&request->wait_event); vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_sendpacket(device->channel, vstor_packet, - (sizeof(struct vstor_packet) - - vmscsi_size_delta), - (unsigned long)request, + sizeof(struct vstor_packet), + VMBUS_RQST_INIT, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) return ret; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return -ETIMEDOUT; @@ -813,14 +876,13 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc) * Query host supported protocol version. */ - for (i = 0; i < ARRAY_SIZE(vmstor_protocols); i++) { + for (i = 0; i < ARRAY_SIZE(protocol_version); i++) { /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERY_PROTOCOL_VERSION; - vstor_packet->version.major_minor = - vmstor_protocols[i].protocol_version; + vstor_packet->version.major_minor = protocol_version[i]; /* * The revision number is only used in Windows; set it to 0. @@ -834,21 +896,16 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc) return -EINVAL; if (vstor_packet->status == 0) { - vmstor_proto_version = - vmstor_protocols[i].protocol_version; - - sense_buffer_size = - vmstor_protocols[i].sense_buffer_size; - - vmscsi_size_delta = - vmstor_protocols[i].vmscsi_size_delta; + vmstor_proto_version = protocol_version[i]; break; } } - if (vstor_packet->status != 0) + if (vstor_packet->status != 0) { + dev_err(&device->device, "Obsolete Hyper-V version\n"); return -EINVAL; + } memset(vstor_packet, 0, sizeof(struct vstor_packet)); @@ -866,27 +923,27 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc) /* * Allocate state to manage the sub-channels. - * We allocate an array based on the numbers of possible CPUs - * (Hyper-V does not support cpu online/offline). - * This Array will be sparseley populated with unique - * channels - primary + sub-channels. - * We will however populate all the slots to evenly distribute - * the load. + * We allocate an array based on the number of CPU ids. This array + * is initially sparsely populated for the CPUs assigned to channels: + * primary + sub-channels. As I/Os are initiated by different CPUs, + * the slots for all online CPUs are populated to evenly distribute + * the load across all channels. */ - stor_device->stor_chns = kcalloc(num_possible_cpus(), sizeof(void *), + stor_device->stor_chns = kcalloc(nr_cpu_ids, sizeof(void *), GFP_KERNEL); if (stor_device->stor_chns == NULL) return -ENOMEM; + device->channel->change_target_cpu_callback = storvsc_change_target_cpu; + stor_device->stor_chns[device->channel->target_cpu] = device->channel; cpumask_set_cpu(device->channel->target_cpu, &stor_device->alloced_cpus); - if (vmstor_proto_version >= VMSTOR_PROTO_VERSION_WIN8) { - if (vstor_packet->storage_channel_properties.flags & - STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL) - process_sub_channels = true; - } + if (vstor_packet->storage_channel_properties.flags & + STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL) + process_sub_channels = true; + stor_device->max_transfer_bytes = vstor_packet->storage_channel_properties.max_transfer_bytes; @@ -929,17 +986,48 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, struct storvsc_scan_work *wrk; void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - bool do_work = false; switch (SRB_STATUS(vm_srb->srb_status)) { case SRB_STATUS_ERROR: - /* - * Let upper layer deal with error when - * sense message is present. - */ + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + case SRB_STATUS_INTERNAL_ERROR: + case SRB_STATUS_TIMEOUT: + case SRB_STATUS_SELECTION_TIMEOUT: + case SRB_STATUS_BUS_RESET: + case SRB_STATUS_DATA_OVERRUN: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } + + /* + * Check for "Operating parameters have changed" + * due to Hyper-V changing the VHD/VHDX BlockSize + * when adding/removing a differencing disk. This + * causes discard_granularity to change, so do a + * rescan to pick up the new granularity. We don't + * want scsi_report_sense() to output a message + * that a sysadmin wouldn't know what to do with. + */ + if ((asc == 0x3f) && (ascq != 0x03) && + (ascq != 0x0e)) { + process_err_fn = storvsc_device_scan; + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } + + /* + * Otherwise, let upper layer deal with the + * error when sense message is present + */ + return; + } - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) - break; /* * If there is an error; offline the device since all * error recovery strategies would have already been @@ -952,43 +1040,32 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, set_host_byte(scmnd, DID_PASSTHROUGH); break; /* - * On Some Windows hosts TEST_UNIT_READY command can return - * SRB_STATUS_ERROR, let the upper level code deal with it - * based on the sense information. + * On some Hyper-V hosts TEST_UNIT_READY command can + * return SRB_STATUS_ERROR. Let the upper level code + * deal with it based on the sense information. */ case TEST_UNIT_READY: break; default: set_host_byte(scmnd, DID_ERROR); } - break; + return; + case SRB_STATUS_INVALID_LUN: set_host_byte(scmnd, DID_NO_CONNECT); - do_work = true; process_err_fn = storvsc_remove_lun; - break; - case SRB_STATUS_ABORTED: - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - (asc == 0x2a) && (ascq == 0x9)) { - do_work = true; - process_err_fn = storvsc_device_scan; - /* - * Retry the I/O that trigerred this. - */ - set_host_byte(scmnd, DID_REQUEUE); - } - break; - } + goto do_work; - if (!do_work) - return; + } + return; +do_work: /* * We need to schedule work to process this error; schedule it. */ wrk = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC); if (!wrk) { - set_host_byte(scmnd, DID_TARGET_FAILURE); + set_host_byte(scmnd, DID_BAD_TARGET); return; } @@ -1010,6 +1087,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, struct Scsi_Host *host; u32 payload_sz = cmd_request->payload_sz; void *payload = cmd_request->payload; + bool sense_ok; host = stor_dev->host; @@ -1019,11 +1097,10 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, scmnd->result = vm_srb->scsi_status; if (scmnd->result) { - if (scsi_normalize_sense(scmnd->sense_buffer, - SCSI_SENSE_BUFFERSIZE, &sense_hdr) && - !(sense_hdr.sense_key == NOT_READY && - sense_hdr.asc == 0x03A) && - do_logging(STORVSC_LOGGING_ERROR)) + sense_ok = scsi_normalize_sense(scmnd->sense_buffer, + SCSI_SENSE_BUFFERSIZE, &sense_hdr); + + if (sense_ok && do_logging(STORVSC_LOGGING_WARN)) scsi_print_sense_hdr(scmnd->device, "storvsc", &sense_hdr); } @@ -1040,10 +1117,14 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, data_transfer_length = 0; } + /* Validate data_transfer_length (from Hyper-V) */ + if (data_transfer_length > cmd_request->payload->range.len) + data_transfer_length = cmd_request->payload->range.len; + scsi_set_resid(scmnd, cmd_request->payload->range.len - data_transfer_length); - scmnd->scsi_done(scmnd); + scsi_done(scmnd); if (payload_sz > sizeof(struct vmbus_channel_packet_multipage_buffer)) @@ -1064,6 +1145,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, * not correctly handle: * INQUIRY command with page code parameter set to 0x80 * MODE_SENSE command with cmd[2] == 0x1c + * MAINTENANCE_IN is not supported by HyperV FC passthrough * * Setup srb and scsi status so this won't be fatal. * We do this so we can distinguish truly fatal failues @@ -1071,53 +1153,58 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, */ if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) || - (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) { + (stor_pkt->vm_srb.cdb[0] == MODE_SENSE) || + (stor_pkt->vm_srb.cdb[0] == MAINTENANCE_IN && + hv_dev_is_fc(device))) { vstor_packet->vm_srb.scsi_status = 0; vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS; } - /* Copy over the status...etc */ stor_pkt->vm_srb.scsi_status = vstor_packet->vm_srb.scsi_status; stor_pkt->vm_srb.srb_status = vstor_packet->vm_srb.srb_status; - stor_pkt->vm_srb.sense_info_length = - vstor_packet->vm_srb.sense_info_length; - - if (vstor_packet->vm_srb.scsi_status != 0 || - vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) - storvsc_log(device, STORVSC_LOGGING_WARN, - "cmd 0x%x scsi status 0x%x srb status 0x%x\n", - stor_pkt->vm_srb.cdb[0], - vstor_packet->vm_srb.scsi_status, - vstor_packet->vm_srb.srb_status); - if ((vstor_packet->vm_srb.scsi_status & 0xFF) == 0x02) { - /* CHECK_CONDITION */ - if (vstor_packet->vm_srb.srb_status & - SRB_STATUS_AUTOSENSE_VALID) { - /* autosense data available */ + /* + * Copy over the sense_info_length, but limit to the known max + * size if Hyper-V returns a bad value. + */ + stor_pkt->vm_srb.sense_info_length = min_t(u8, STORVSC_SENSE_BUFFER_SIZE, + vstor_packet->vm_srb.sense_info_length); - storvsc_log(device, STORVSC_LOGGING_WARN, - "stor pkt %p autosense data valid - len %d\n", - request, vstor_packet->vm_srb.sense_info_length); + if (vstor_packet->vm_srb.scsi_status != 0 || + vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) { - memcpy(request->cmd->sense_buffer, - vstor_packet->vm_srb.sense_data, - vstor_packet->vm_srb.sense_info_length); + /* + * Log TEST_UNIT_READY errors only as warnings. Hyper-V can + * return errors when detecting devices using TEST_UNIT_READY, + * and logging these as errors produces unhelpful noise. + */ + int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ? + STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR; - } + storvsc_log_ratelimited(device, loglevel, + "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x host 0x%x\n", + scsi_cmd_to_rq(request->cmd)->tag, + stor_pkt->vm_srb.cdb[0], + vstor_packet->vm_srb.scsi_status, + vstor_packet->vm_srb.srb_status, + vstor_packet->status); } + if (vstor_packet->vm_srb.scsi_status == SAM_STAT_CHECK_CONDITION && + (vstor_packet->vm_srb.srb_status & SRB_STATUS_AUTOSENSE_VALID)) + memcpy(request->cmd->sense_buffer, + vstor_packet->vm_srb.sense_data, + stor_pkt->vm_srb.sense_info_length); + stor_pkt->vm_srb.data_transfer_length = - vstor_packet->vm_srb.data_transfer_length; + vstor_packet->vm_srb.data_transfer_length; storvsc_command_completion(request, stor_device); if (atomic_dec_and_test(&stor_device->num_outstanding_req) && stor_device->drain_notify) wake_up(&stor_device->waiting_to_drain); - - } static void storvsc_on_receive(struct storvsc_device *stor_device, @@ -1155,6 +1242,8 @@ static void storvsc_on_channel_callback(void *context) const struct vmpacket_descriptor *desc; struct hv_device *device; struct storvsc_device *stor_device; + struct Scsi_Host *shost; + unsigned long time_limit = jiffies + msecs_to_jiffies(CALLBACK_TIMEOUT); if (channel->primary_channel != NULL) device = channel->primary_channel->device_obj; @@ -1165,21 +1254,77 @@ static void storvsc_on_channel_callback(void *context) if (!stor_device) return; + shost = stor_device->host; + foreach_vmbus_pkt(desc, channel) { - void *packet = hv_pkt_data(desc); - struct storvsc_cmd_request *request; + struct vstor_packet *packet = hv_pkt_data(desc); + struct storvsc_cmd_request *request = NULL; + u32 pktlen = hv_pkt_datalen(desc); + u64 rqst_id = desc->trans_id; + u32 minlen = rqst_id ? sizeof(struct vstor_packet) : + sizeof(enum vstor_packet_operation); + + if (unlikely(time_after(jiffies, time_limit))) { + hv_pkt_iter_close(channel); + return; + } - request = (struct storvsc_cmd_request *) - ((unsigned long)desc->trans_id); + if (pktlen < minlen) { + dev_err(&device->device, + "Invalid pkt: id=%llu, len=%u, minlen=%u\n", + rqst_id, pktlen, minlen); + continue; + } - if (request == &stor_device->init_request || - request == &stor_device->reset_request) { - memcpy(&request->vstor_packet, packet, - (sizeof(struct vstor_packet) - vmscsi_size_delta)); - complete(&request->wait_event); + if (rqst_id == VMBUS_RQST_INIT) { + request = &stor_device->init_request; + } else if (rqst_id == VMBUS_RQST_RESET) { + request = &stor_device->reset_request; } else { + /* Hyper-V can send an unsolicited message with ID of 0 */ + if (rqst_id == 0) { + /* + * storvsc_on_receive() looks at the vstor_packet in the message + * from the ring buffer. + * + * - If the operation in the vstor_packet is COMPLETE_IO, then + * we call storvsc_on_io_completion(), and dereference the + * guest memory address. Make sure we don't call + * storvsc_on_io_completion() with a guest memory address + * that is zero if Hyper-V were to construct and send such + * a bogus packet. + * + * - If the operation in the vstor_packet is FCHBA_DATA, then + * we call cache_wwn(), and access the data payload area of + * the packet (wwn_packet); however, there is no guarantee + * that the packet is big enough to contain such area. + * Future-proof the code by rejecting such a bogus packet. + */ + if (packet->operation == VSTOR_OPERATION_COMPLETE_IO || + packet->operation == VSTOR_OPERATION_FCHBA_DATA) { + dev_err(&device->device, "Invalid packet with ID of 0\n"); + continue; + } + } else { + struct scsi_cmnd *scmnd; + + /* Transaction 'rqst_id' corresponds to tag 'rqst_id - 1' */ + scmnd = scsi_host_find_tag(shost, rqst_id - 1); + if (scmnd == NULL) { + dev_err(&device->device, "Incorrect transaction ID\n"); + continue; + } + request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd); + scsi_dma_unmap(scmnd); + } + storvsc_on_receive(stor_device, packet, request); + continue; } + + memcpy(&request->vstor_packet, packet, + sizeof(struct vstor_packet)); + complete(&request->wait_event); } } @@ -1191,6 +1336,9 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, memset(&props, 0, sizeof(struct vmstorage_channel_properties)); + device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE; + device->channel->next_request_id_callback = storvsc_next_request_id; + ret = vmbus_open(device->channel, ring_size, ring_size, @@ -1202,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, return ret; ret = storvsc_channel_init(device, is_fc); + if (ret) + vmbus_close(device->channel); return ret; } @@ -1250,19 +1400,25 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device, const struct cpumask *node_mask; int num_channels, tgt_cpu; - if (stor_device->num_sc == 0) + if (stor_device->num_sc == 0) { + stor_device->stor_chns[q_num] = stor_device->device->channel; return stor_device->device->channel; + } /* - * Our channel array is sparsley populated and we + * Our channel array could be sparsley populated and we * initiated I/O on a processor/hw-q that does not * currently have a designated channel. Fix this. * The strategy is simple: - * I. Ensure NUMA locality - * II. Distribute evenly (best effort) - * III. Mapping is persistent. + * I. Prefer the channel associated with the current CPU + * II. Ensure NUMA locality + * III. Distribute evenly (best effort) */ + /* Prefer the channel on the I/O issuing processor/hw-q */ + if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus)) + return stor_device->stor_chns[q_num]; + node_mask = cpumask_of_node(cpu_to_node(q_num)); num_channels = 0; @@ -1270,8 +1426,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device, if (cpumask_test_cpu(tgt_cpu, node_mask)) num_channels++; } - if (num_channels == 0) + if (num_channels == 0) { + stor_device->stor_chns[q_num] = stor_device->device->channel; return stor_device->device->channel; + } hash_qnum = q_num; while (hash_qnum >= num_channels) @@ -1297,6 +1455,7 @@ static int storvsc_do_io(struct hv_device *device, struct storvsc_device *stor_device; struct vstor_packet *vstor_packet; struct vmbus_channel *outgoing_channel, *channel; + unsigned long flags; int ret = 0; const struct cpumask *node_mask; int tgt_cpu; @@ -1310,69 +1469,71 @@ static int storvsc_do_io(struct hv_device *device, request->device = device; /* - * Select an an appropriate channel to send the request out. + * Select an appropriate channel to send the request out. */ - if (stor_device->stor_chns[q_num] != NULL) { - outgoing_channel = stor_device->stor_chns[q_num]; - if (outgoing_channel->target_cpu == q_num) { - /* - * Ideally, we want to pick a different channel if - * available on the same NUMA node. - */ - node_mask = cpumask_of_node(cpu_to_node(q_num)); - for_each_cpu_wrap(tgt_cpu, - &stor_device->alloced_cpus, q_num + 1) { - if (!cpumask_test_cpu(tgt_cpu, node_mask)) - continue; - if (tgt_cpu == q_num) - continue; - channel = stor_device->stor_chns[tgt_cpu]; - if (hv_get_avail_to_write_percent( - &channel->outbound) - > ring_avail_percent_lowater) { - outgoing_channel = channel; - goto found_channel; - } - } + /* See storvsc_change_target_cpu(). */ + outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]); + if (outgoing_channel != NULL) { + if (hv_get_avail_to_write_percent(&outgoing_channel->outbound) + > ring_avail_percent_lowater) + goto found_channel; - /* - * All the other channels on the same NUMA node are - * busy. Try to use the channel on the current CPU - */ - if (hv_get_avail_to_write_percent( - &outgoing_channel->outbound) - > ring_avail_percent_lowater) + /* + * Channel is busy, try to find a channel on the same NUMA node + */ + node_mask = cpumask_of_node(cpu_to_node(q_num)); + for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus, + q_num + 1) { + if (!cpumask_test_cpu(tgt_cpu, node_mask)) + continue; + channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]); + if (!channel) + continue; + if (hv_get_avail_to_write_percent(&channel->outbound) + > ring_avail_percent_lowater) { + outgoing_channel = channel; goto found_channel; + } + } - /* - * If we reach here, all the channels on the current - * NUMA node are busy. Try to find a channel in - * other NUMA nodes - */ - for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { - if (cpumask_test_cpu(tgt_cpu, node_mask)) - continue; - channel = stor_device->stor_chns[tgt_cpu]; - if (hv_get_avail_to_write_percent( - &channel->outbound) - > ring_avail_percent_lowater) { - outgoing_channel = channel; - goto found_channel; - } + /* + * If we reach here, all the channels on the current + * NUMA node are busy. Try to find a channel in + * all NUMA nodes + */ + for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus, + q_num + 1) { + channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]); + if (!channel) + continue; + if (hv_get_avail_to_write_percent(&channel->outbound) + > ring_avail_percent_lowater) { + outgoing_channel = channel; + goto found_channel; } } + /* + * If we reach here, all the channels are busy. Use the + * original channel found. + */ } else { + spin_lock_irqsave(&stor_device->lock, flags); + outgoing_channel = stor_device->stor_chns[q_num]; + if (outgoing_channel != NULL) { + spin_unlock_irqrestore(&stor_device->lock, flags); + goto found_channel; + } outgoing_channel = get_og_chn(stor_device, q_num); + spin_unlock_irqrestore(&stor_device->lock, flags); } found_channel: vstor_packet->flags |= REQUEST_COMPLETION_FLAG; - vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) - - vmscsi_size_delta); + vstor_packet->vm_srb.length = sizeof(struct vmscsi_request); - vstor_packet->vm_srb.sense_info_length = sense_buffer_size; + vstor_packet->vm_srb.sense_info_length = STORVSC_SENSE_BUFFER_SIZE; vstor_packet->vm_srb.data_transfer_length = @@ -1385,13 +1546,11 @@ found_channel: ret = vmbus_sendpacket_mpb_desc(outgoing_channel, request->payload, request->payload_sz, vstor_packet, - (sizeof(struct vstor_packet) - - vmscsi_size_delta), + sizeof(struct vstor_packet), (unsigned long)request); } else { ret = vmbus_sendpacket(outgoing_channel, vstor_packet, - (sizeof(struct vstor_packet) - - vmscsi_size_delta), + sizeof(struct vstor_packet), (unsigned long)request, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); @@ -1421,13 +1580,13 @@ static int storvsc_device_alloc(struct scsi_device *sdevice) return 0; } -static int storvsc_device_configure(struct scsi_device *sdevice) +static int storvsc_sdev_configure(struct scsi_device *sdevice, + struct queue_limits *lim) { blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); - /* Ensure there are no gaps in presented sgls */ - blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1); - + /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */ + sdevice->no_report_opcodes = 1; sdevice->no_write_same = 1; /* @@ -1450,7 +1609,7 @@ static int storvsc_device_configure(struct scsi_device *sdevice) return 0; } -static int storvsc_get_chs(struct scsi_device *sdev, struct block_device * bdev, +static int storvsc_get_chs(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int *info) { sector_t nsect = capacity; @@ -1483,13 +1642,13 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) struct vstor_packet *vstor_packet; int ret, t; - stor_device = get_out_stor_device(device); if (!stor_device) return FAILED; request = &stor_device->reset_request; vstor_packet = &request->vstor_packet; + memset(vstor_packet, 0, sizeof(struct vstor_packet)); init_completion(&request->wait_event); @@ -1498,15 +1657,14 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) vstor_packet->vm_srb.path_id = stor_device->path_id; ret = vmbus_sendpacket(device->channel, vstor_packet, - (sizeof(struct vstor_packet) - - vmscsi_size_delta), - (unsigned long)&stor_device->reset_request, + sizeof(struct vstor_packet), + VMBUS_RQST_RESET, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) return FAILED; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return TIMEOUT_ERROR; @@ -1529,13 +1687,9 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) * be unbounded on Azure. Reset the timer unconditionally to give the host a * chance to perform EH. */ -static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd) +static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd) { -#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) - if (scmnd->device->host->transportt == fc_transport_template) - return fc_eh_timed_out(scmnd); -#endif - return BLK_EH_RESET_TIMER; + return SCSI_EH_RESET_TIMER; } static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) @@ -1551,7 +1705,7 @@ static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) * this. So, don't send it. */ case SET_WINDOW: - scmnd->result = ILLEGAL_REQUEST << 16; + set_host_byte(scmnd, DID_ERROR); allowed = false; break; default: @@ -1566,11 +1720,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) struct hv_host_device *host_dev = shost_priv(host); struct hv_device *dev = host_dev->dev; struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd); - int i; struct scatterlist *sgl; - unsigned int sg_count = 0; struct vmscsi_request *vm_srb; - struct scatterlist *cur_sgl; struct vmbus_packet_mpb_array *payload; u32 payload_sz; u32 length; @@ -1585,7 +1736,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) * future versions of the host. */ if (!storvsc_scsi_cmd_ok(scmnd)) { - scmnd->scsi_done(scmnd); + scsi_done(scmnd); return 0; } } @@ -1593,32 +1744,33 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) /* Setup the cmd request */ cmd_request->cmd = scmnd; + memset(&cmd_request->vstor_packet, 0, sizeof(struct vstor_packet)); vm_srb = &cmd_request->vstor_packet.vm_srb; - vm_srb->win8_extension.time_out_value = 60; + vm_srb->time_out_value = 60; - vm_srb->win8_extension.srb_flags |= + vm_srb->srb_flags |= SRB_FLAGS_DISABLE_SYNCH_TRANSFER; if (scmnd->device->tagged_supported) { - vm_srb->win8_extension.srb_flags |= + vm_srb->srb_flags |= (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE); - vm_srb->win8_extension.queue_tag = SP_UNTAGGED; - vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST; + vm_srb->queue_tag = SP_UNTAGGED; + vm_srb->queue_action = SRB_SIMPLE_TAG_REQUEST; } /* Build the SRB */ switch (scmnd->sc_data_direction) { case DMA_TO_DEVICE: vm_srb->data_in = WRITE_TYPE; - vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_OUT; + vm_srb->srb_flags |= SRB_FLAGS_DATA_OUT; break; case DMA_FROM_DEVICE: vm_srb->data_in = READ_TYPE; - vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_IN; + vm_srb->srb_flags |= SRB_FLAGS_DATA_IN; break; case DMA_NONE: vm_srb->data_in = UNKNOWN_TYPE; - vm_srb->win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; + vm_srb->srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; break; default: /* @@ -1641,30 +1793,60 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) memcpy(vm_srb->cdb, scmnd->cmnd, vm_srb->cdb_length); sgl = (struct scatterlist *)scsi_sglist(scmnd); - sg_count = scsi_sg_count(scmnd); length = scsi_bufflen(scmnd); payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb; - payload_sz = sizeof(cmd_request->mpb); + payload->range.len = 0; + payload_sz = 0; - if (sg_count) { - if (sg_count > MAX_PAGE_BUFFER_COUNT) { + if (scsi_sg_count(scmnd)) { + unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset); + unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length); + struct scatterlist *sg; + unsigned long hvpfn, hvpfns_to_add; + int j, i = 0, sg_count; - payload_sz = (sg_count * sizeof(u64) + - sizeof(struct vmbus_packet_mpb_array)); + payload_sz = (hvpg_count * sizeof(u64) + + sizeof(struct vmbus_packet_mpb_array)); + + if (hvpg_count > MAX_PAGE_BUFFER_COUNT) { payload = kzalloc(payload_sz, GFP_ATOMIC); if (!payload) return SCSI_MLQUEUE_DEVICE_BUSY; } + payload->rangecount = 1; payload->range.len = length; - payload->range.offset = sgl[0].offset; + payload->range.offset = offset_in_hvpg; + + sg_count = scsi_dma_map(scmnd); + if (sg_count < 0) { + ret = SCSI_MLQUEUE_DEVICE_BUSY; + goto err_free_payload; + } + + for_each_sg(sgl, sg, sg_count, j) { + /* + * Init values for the current sgl entry. hvpfns_to_add + * is in units of Hyper-V size pages. Handling the + * PAGE_SIZE != HV_HYP_PAGE_SIZE case also handles + * values of sgl->offset that are larger than PAGE_SIZE. + * Such offsets are handled even on other than the first + * sgl entry, provided they are a multiple of PAGE_SIZE. + */ + hvpfn = HVPFN_DOWN(sg_dma_address(sg)); + hvpfns_to_add = HVPFN_UP(sg_dma_address(sg) + + sg_dma_len(sg)) - hvpfn; - cur_sgl = sgl; - for (i = 0; i < sg_count; i++) { - payload->range.pfn_array[i] = - page_to_pfn(sg_page((cur_sgl))); - cur_sgl = sg_next(cur_sgl); + /* + * Fill the next portion of the PFN array with + * sequential Hyper-V PFNs for the continguous physical + * memory described by the sgl entry. The end of the + * last sgl should be reached at the same time that + * the PFN array is filled. + */ + while (hvpfns_to_add--) + payload->range.pfn_array[i++] = hvpfn++; } } @@ -1675,14 +1857,22 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request, get_cpu()); put_cpu(); + if (ret) + scsi_dma_unmap(scmnd); + if (ret == -EAGAIN) { - if (payload_sz > sizeof(cmd_request->mpb)) - kfree(payload); /* no more space */ - return SCSI_MLQUEUE_DEVICE_BUSY; + ret = SCSI_MLQUEUE_DEVICE_BUSY; + goto err_free_payload; } return 0; + +err_free_payload: + if (payload_sz > sizeof(cmd_request->mpb)) + kfree(payload); + + return ret; } static struct scsi_host_template scsi_driver = { @@ -1694,14 +1884,15 @@ static struct scsi_host_template scsi_driver = { .eh_host_reset_handler = storvsc_host_reset_handler, .proc_name = "storvsc_host", .eh_timed_out = storvsc_eh_timed_out, - .slave_alloc = storvsc_device_alloc, - .slave_configure = storvsc_device_configure, + .sdev_init = storvsc_device_alloc, + .sdev_configure = storvsc_sdev_configure, .cmd_per_lun = 2048, .this_id = -1, - /* Make sure we dont get a sg segment crosses a page boundary */ - .dma_boundary = PAGE_SIZE-1, + /* Ensure there are no gaps in presented sgls */ + .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1, .no_write_same = 1, .track_queue_depth = 1, + .change_queue_depth = storvsc_change_queue_depth, }; enum { @@ -1729,45 +1920,36 @@ static const struct hv_vmbus_device_id id_table[] = { MODULE_DEVICE_TABLE(vmbus, id_table); +static const struct { guid_t guid; } fc_guid = { HV_SYNTHFC_GUID }; + +static bool hv_dev_is_fc(struct hv_device *hv_dev) +{ + return guid_equal(&fc_guid.guid, &hv_dev->dev_type); +} + static int storvsc_probe(struct hv_device *device, const struct hv_vmbus_device_id *dev_id) { int ret; int num_cpus = num_online_cpus(); + int num_present_cpus = num_present_cpus(); struct Scsi_Host *host; struct hv_host_device *host_dev; - bool dev_is_ide = ((dev_id->driver_data == IDE_GUID) ? true : false); - bool is_fc = ((dev_id->driver_data == SFC_GUID) ? true : false); + bool dev_is_ide = dev_id->driver_data == IDE_GUID; + bool is_fc = dev_id->driver_data == SFC_GUID; int target = 0; struct storvsc_device *stor_device; - int max_luns_per_target; - int max_targets; - int max_channels; int max_sub_channels = 0; + u32 max_xfer_bytes; /* - * Based on the windows host we are running on, - * set state to properly communicate with the host. + * We support sub-channels for storage on SCSI and FC controllers. + * The number of sub-channels offerred is based on the number of + * VCPUs in the guest. */ - - if (vmbus_proto_version < VERSION_WIN8) { - max_luns_per_target = STORVSC_IDE_MAX_LUNS_PER_TARGET; - max_targets = STORVSC_IDE_MAX_TARGETS; - max_channels = STORVSC_IDE_MAX_CHANNELS; - } else { - max_luns_per_target = STORVSC_MAX_LUNS_PER_TARGET; - max_targets = STORVSC_MAX_TARGETS; - max_channels = STORVSC_MAX_CHANNELS; - /* - * On Windows8 and above, we support sub-channels for storage - * on SCSI and FC controllers. - * The number of sub-channels offerred is based on the number of - * VCPUs in the guest. - */ - if (!dev_is_ide) - max_sub_channels = - (num_cpus - 1) / storvsc_vcpus_per_sub_channel; - } + if (!dev_is_ide) + max_sub_channels = + (num_cpus - 1) / storvsc_vcpus_per_sub_channel; scsi_driver.can_queue = max_outstanding_req_per_channel * (max_sub_channels + 1) * @@ -1796,10 +1978,12 @@ static int storvsc_probe(struct hv_device *device, init_waitqueue_head(&stor_device->waiting_to_drain); stor_device->device = device; stor_device->host = host; + spin_lock_init(&stor_device->lock); hv_set_drvdata(device, stor_device); + dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -1817,9 +2001,9 @@ static int storvsc_probe(struct hv_device *device, break; case SCSI_GUID: - host->max_lun = max_luns_per_target; - host->max_id = max_targets; - host->max_channel = max_channels - 1; + host->max_lun = STORVSC_MAX_LUNS_PER_TARGET; + host->max_id = STORVSC_MAX_TARGETS; + host->max_channel = STORVSC_MAX_CHANNELS - 1; break; default: @@ -1830,27 +2014,58 @@ static int storvsc_probe(struct hv_device *device, } /* max cmd length */ host->max_cmd_len = STORVSC_MAX_CMD_LEN; + /* + * Any reasonable Hyper-V configuration should provide + * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE, + * protecting it from any weird value. + */ + max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE); + if (is_fc) + max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE); + /* max_hw_sectors_kb */ + host->max_sectors = max_xfer_bytes >> 9; /* - * set the table size based on the info we got - * from the host. + * There are 2 requirements for Hyper-V storvsc sgl segments, + * based on which the below calculation for max segments is + * done: + * + * 1. Except for the first and last sgl segment, all sgl segments + * should be align to HV_HYP_PAGE_SIZE, that also means the + * maximum number of segments in a sgl can be calculated by + * dividing the total max transfer length by HV_HYP_PAGE_SIZE. + * + * 2. Except for the first and last, each entry in the SGL must + * have an offset that is a multiple of HV_HYP_PAGE_SIZE. */ - host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); + host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1; /* + * For non-IDE disks, the host supports multiple channels. * Set the number of HW queues we are supporting. */ - if (stor_device->num_sc != 0) - host->nr_hw_queues = stor_device->num_sc + 1; + if (!dev_is_ide) { + if (storvsc_max_hw_queues > num_present_cpus) { + storvsc_max_hw_queues = 0; + storvsc_log(device, STORVSC_LOGGING_WARN, + "Resetting invalid storvsc_max_hw_queues value to default.\n"); + } + if (storvsc_max_hw_queues) + host->nr_hw_queues = storvsc_max_hw_queues; + else + host->nr_hw_queues = num_present_cpus; + } /* * Set the error handler work queue. */ host_dev->handle_error_wq = alloc_ordered_workqueue("storvsc_error_wq_%d", - WQ_MEM_RECLAIM, + 0, host->host_no); - if (!host_dev->handle_error_wq) + if (!host_dev->handle_error_wq) { + ret = -ENOMEM; goto err_out2; + } INIT_WORK(&host_dev->host_scan_work, storvsc_host_scan); /* Register the HBA and start the scsi bus scan */ ret = scsi_add_host(host, &device->device); @@ -1892,7 +2107,7 @@ err_out3: err_out2: /* * Once we have connected with the host, we would need to - * to invoke storvsc_dev_remove() to rollback this state and + * invoke storvsc_dev_remove() to rollback this state and * this call also frees up the stor_device; hence the jump around * err_out1 label. */ @@ -1908,7 +2123,16 @@ err_out0: return ret; } -static int storvsc_remove(struct hv_device *dev) +/* Change a scsi target's queue depth */ +static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth) +{ + if (queue_depth > scsi_driver.can_queue) + queue_depth = scsi_driver.can_queue; + + return scsi_change_queue_depth(sdev, queue_depth); +} + +static void storvsc_remove(struct hv_device *dev) { struct storvsc_device *stor_device = hv_get_drvdata(dev); struct Scsi_Host *host = stor_device->host; @@ -1924,15 +2148,44 @@ static int storvsc_remove(struct hv_device *dev) scsi_remove_host(host); storvsc_dev_remove(dev); scsi_host_put(host); +} + +static int storvsc_suspend(struct hv_device *hv_dev) +{ + struct storvsc_device *stor_device = hv_get_drvdata(hv_dev); + struct Scsi_Host *host = stor_device->host; + struct hv_host_device *host_dev = shost_priv(host); + + storvsc_wait_to_drain(stor_device); + + drain_workqueue(host_dev->handle_error_wq); + + vmbus_close(hv_dev->channel); + + kfree(stor_device->stor_chns); + stor_device->stor_chns = NULL; + + cpumask_clear(&stor_device->alloced_cpus); return 0; } +static int storvsc_resume(struct hv_device *hv_dev) +{ + int ret; + + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, + hv_dev_is_fc(hv_dev)); + return ret; +} + static struct hv_driver storvsc_drv = { .name = KBUILD_MODNAME, .id_table = id_table, .probe = storvsc_probe, .remove = storvsc_remove, + .suspend = storvsc_suspend, + .resume = storvsc_resume, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, @@ -1955,11 +2208,11 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + - sizeof(struct vstor_packet) + sizeof(u64) - - vmscsi_size_delta, + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) |
