summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp/srp
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 17:35:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 17:35:43 -0700
commit19fd08b85bc7e0502b55cd726f466df82ee7e777 (patch)
treeb042de4b9a8a9478c528ea950b14d34487375695 /drivers/infiniband/ulp/srp
parent28da7be5ebc096ada5e6bc526c623bdd8c47800a (diff)
parentefc365e7290d040fbd43f60b0e97653489a739d4 (diff)
Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Doug and I are at a conference next week so if another PR is sent I expect it to only be bug fixes. Parav noted yesterday that there are some fringe case behavior changes in his work that he would like to fix, and I see that Intel has a number of rc looking patches for HFI1 they posted yesterday. Parav is again the biggest contributor by patch count with his ongoing work to enable container support in the RDMA stack, followed by Leon doing syzkaller inspired cleanups, though most of the actual fixing went to RC. There is one uncomfortable series here fixing the user ABI to actually work as intended in 32 bit mode. There are lots of notes in the commit messages, but the basic summary is we don't think there is an actual 32 bit kernel user of drivers/infiniband for several good reasons. However we are seeing people want to use a 32 bit user space with 64 bit kernel, which didn't completely work today. So in fixing it we required a 32 bit rxe user to upgrade their userspace. rxe users are still already quite rare and we think a 32 bit one is non-existing. - Fix RDMA uapi headers to actually compile in userspace and be more complete - Three shared with netdev pull requests from Mellanox: * 7 patches, mostly to net with 1 IB related one at the back). This series addresses an IRQ performance issue (patch 1), cleanups related to the fix for the IRQ performance problem (patches 2-6), and then extends the fragmented completion queue support that already exists in the net side of the driver to the ib side of the driver (patch 7). * Mostly IB, with 5 patches to net that are needed to support the remaining 10 patches to the IB subsystem. This series extends the current 'representor' framework when the mlx5 driver is in switchdev mode from being a netdev only construct to being a netdev/IB dev construct. The IB dev is limited to raw Eth queue pairs only, but by having an IB dev of this type attached to the representor for a switchdev port, it enables DPDK to work on the switchdev device. * All net related, but needed as infrastructure for the rdma driver - Updates for the hns, i40iw, bnxt_re, cxgb3, cxgb4, hns drivers - SRP performance updates - IB uverbs write path cleanup patch series from Leon - Add RDMA_CM support to ib_srpt. This is disabled by default. Users need to set the port for ib_srpt to listen on in configfs in order for it to be enabled (/sys/kernel/config/target/srpt/discovery_auth/rdma_cm_port) - TSO and Scatter FCS support in mlx4 - Refactor of modify_qp routine to resolve problems seen while working on new code that is forthcoming - More refactoring and updates of RDMA CM for containers support from Parav - mlx5 'fine grained packet pacing', 'ipsec offload' and 'device memory' user API features - Infrastructure updates for the new IOCTL interface, based on increased usage - ABI compatibility bug fixes to fully support 32 bit userspace on 64 bit kernel as was originally intended. See the commit messages for extensive details - Syzkaller bugs and code cleanups motivated by them" * tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (199 commits) IB/rxe: Fix for oops in rxe_register_device on ppc64le arch IB/mlx5: Device memory mr registration support net/mlx5: Mkey creation command adjustments IB/mlx5: Device memory support in mlx5_ib net/mlx5: Query device memory capabilities IB/uverbs: Add device memory registration ioctl support IB/uverbs: Add alloc/free dm uverbs ioctl support IB/uverbs: Add device memory capabilities reporting IB/uverbs: Expose device memory capabilities to user RDMA/qedr: Fix wmb usage in qedr IB/rxe: Removed GID add/del dummy routines RDMA/qedr: Zero stack memory before copying to user space IB/mlx5: Add ability to hash by IPSEC_SPI when creating a TIR IB/mlx5: Add information for querying IPsec capabilities IB/mlx5: Add IPsec support for egress and ingress {net,IB}/mlx5: Add ipsec helper IB/mlx5: Add modify_flow_action_esp verb IB/mlx5: Add implementation for create and destroy action_xfrm IB/uverbs: Introduce ESP steering match filter IB/uverbs: Add modify ESP flow_action ...
Diffstat (limited to 'drivers/infiniband/ulp/srp')
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c181
1 files changed, 84 insertions, 97 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b48843833d69..c35d2cd37d70 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -327,29 +327,10 @@ static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
return 0;
}
-static const char *inet_ntop(const void *sa, char *dst, unsigned int size)
-{
- switch (((struct sockaddr *)sa)->sa_family) {
- case AF_INET:
- snprintf(dst, size, "%pI4",
- &((struct sockaddr_in *)sa)->sin_addr);
- break;
- case AF_INET6:
- snprintf(dst, size, "%pI6",
- &((struct sockaddr_in6 *)sa)->sin6_addr);
- break;
- default:
- snprintf(dst, size, "???");
- break;
- }
- return dst;
-}
-
static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
struct rdma_cm_id *new_cm_id;
- char src_addr[64], dst_addr[64];
int ret;
new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
@@ -366,13 +347,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
(struct sockaddr *)&target->rdma_cm.dst,
SRP_PATH_REC_TIMEOUT_MS);
if (ret) {
- pr_err("No route available from %s to %s (%d)\n",
- target->rdma_cm.src_specified ?
- inet_ntop(&target->rdma_cm.src, src_addr,
- sizeof(src_addr)) : "(any)",
- inet_ntop(&target->rdma_cm.dst, dst_addr,
- sizeof(dst_addr)),
- ret);
+ pr_err("No route available from %pIS to %pIS (%d)\n",
+ &target->rdma_cm.src, &target->rdma_cm.dst, ret);
goto out;
}
ret = wait_for_completion_interruptible(&ch->done);
@@ -381,10 +357,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
ret = ch->status;
if (ret) {
- pr_err("Resolving address %s failed (%d)\n",
- inet_ntop(&target->rdma_cm.dst, dst_addr,
- sizeof(dst_addr)),
- ret);
+ pr_err("Resolving address %pIS failed (%d)\n",
+ &target->rdma_cm.dst, ret);
goto out;
}
@@ -457,6 +431,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct srp_fr_desc *d;
struct ib_mr *mr;
int i, ret = -EINVAL;
+ enum ib_mr_type mr_type;
if (pool_size <= 0)
goto err;
@@ -470,9 +445,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
+ if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ mr_type = IB_MR_TYPE_SG_GAPS;
+ else
+ mr_type = IB_MR_TYPE_MEM_REG;
+
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
- max_page_list_len);
+ mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
if (ret == -ENOMEM)
@@ -765,19 +744,12 @@ static void srp_path_rec_completion(int status,
static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
- int ret = -ENODEV;
+ int ret;
ch->ib_cm.path.numb_path = 1;
init_completion(&ch->done);
- /*
- * Avoid that the SCSI host can be removed by srp_remove_target()
- * before srp_path_rec_completion() is called.
- */
- if (!scsi_host_get(target->scsi_host))
- goto out;
-
ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
target->srp_host->srp_dev->dev,
target->srp_host->port,
@@ -791,27 +763,21 @@ static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
GFP_KERNEL,
srp_path_rec_completion,
ch, &ch->ib_cm.path_query);
- ret = ch->ib_cm.path_query_id;
- if (ret < 0)
- goto put;
+ if (ch->ib_cm.path_query_id < 0)
+ return ch->ib_cm.path_query_id;
ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0)
- goto put;
+ return ret;
- ret = ch->status;
- if (ret < 0)
+ if (ch->status < 0)
shost_printk(KERN_WARNING, target->scsi_host,
PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
be16_to_cpu(target->ib_cm.pkey),
be64_to_cpu(target->ib_cm.service_id));
-put:
- scsi_host_put(target->scsi_host);
-
-out:
- return ret;
+ return ch->status;
}
static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
@@ -2974,9 +2940,11 @@ static int srp_abort(struct scsi_cmnd *scmnd)
ret = FAST_IO_FAIL;
else
ret = FAILED;
- srp_free_req(ch, req, scmnd, 0);
- scmnd->result = DID_ABORT << 16;
- scmnd->scsi_done(scmnd);
+ if (ret == SUCCESS) {
+ srp_free_req(ch, req, scmnd, 0);
+ scmnd->result = DID_ABORT << 16;
+ scmnd->scsi_done(scmnd);
+ }
return ret;
}
@@ -3033,8 +3001,9 @@ static int srp_slave_alloc(struct scsi_device *sdev)
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
- if (true)
+ if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
@@ -3365,9 +3334,6 @@ static bool srp_conn_unique(struct srp_host *host,
if (t != target &&
target->id_ext == t->id_ext &&
target->ioc_guid == t->ioc_guid &&
- (!target->using_rdma_cm ||
- memcmp(&target->rdma_cm.dst, &t->rdma_cm.dst,
- sizeof(target->rdma_cm.dst)) == 0) &&
target->initiator_ext == t->initiator_ext) {
ret = false;
break;
@@ -3445,18 +3411,37 @@ static const match_table_t srp_opt_tokens = {
{ SRP_OPT_ERR, NULL }
};
+/**
+ * srp_parse_in - parse an IP address and port number combination
+ *
+ * Parse the following address formats:
+ * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
+ * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
+ */
static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
const char *addr_port_str)
{
- char *addr = kstrdup(addr_port_str, GFP_KERNEL);
- char *port_str = addr;
+ char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
+ char *port_str;
int ret;
if (!addr)
return -ENOMEM;
- strsep(&port_str, ":");
- ret = inet_pton_with_scope(net, AF_UNSPEC, addr, port_str, sa);
+ port_str = strrchr(addr, ':');
+ if (!port_str)
+ return -EINVAL;
+ *port_str++ = '\0';
+ ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
+ if (ret && addr[0]) {
+ addr_end = addr + strlen(addr) - 1;
+ if (addr[0] == '[' && *addr_end == ']') {
+ *addr_end = '\0';
+ ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
+ port_str, sa);
+ }
+ }
kfree(addr);
+ pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
return ret;
}
@@ -3789,14 +3774,11 @@ static ssize_t srp_create_target(struct device *dev,
if (!srp_conn_unique(target->srp_host, target)) {
if (target->using_rdma_cm) {
- char dst_addr[64];
-
shost_printk(KERN_INFO, target->scsi_host,
- PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n",
+ PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
be64_to_cpu(target->id_ext),
be64_to_cpu(target->ioc_guid),
- inet_ntop(&target->rdma_cm.dst, dst_addr,
- sizeof(dst_addr)));
+ &target->rdma_cm.dst);
} else {
shost_printk(KERN_INFO, target->scsi_host,
PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
@@ -3815,26 +3797,36 @@ static ssize_t srp_create_target(struct device *dev,
}
if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
- /*
- * FR and FMR can only map one HCA page per entry. If the
- * start address is not aligned on a HCA page boundary two
- * entries will be used for the head and the tail although
- * these two entries combined contain at most one HCA page of
- * data. Hence the "+ 1" in the calculation below.
- *
- * The indirect data buffer descriptor is contiguous so the
- * memory for that buffer will only be registered if
- * register_always is true. Hence add one to mr_per_cmd if
- * register_always has been set.
- */
+ bool gaps_reg = (ibdev->attrs.device_cap_flags &
+ IB_DEVICE_SG_GAPS_REG);
+
max_sectors_per_mr = srp_dev->max_pages_per_mr <<
(ilog2(srp_dev->mr_page_size) - 9);
- mr_per_cmd = register_always +
- (target->scsi_host->max_sectors + 1 +
- max_sectors_per_mr - 1) / max_sectors_per_mr;
+ if (!gaps_reg) {
+ /*
+ * FR and FMR can only map one HCA page per entry. If
+ * the start address is not aligned on a HCA page
+ * boundary two entries will be used for the head and
+ * the tail although these two entries combined
+ * contain at most one HCA page of data. Hence the "+
+ * 1" in the calculation below.
+ *
+ * The indirect data buffer descriptor is contiguous
+ * so the memory for that buffer will only be
+ * registered if register_always is true. Hence add
+ * one to mr_per_cmd if register_always has been set.
+ */
+ mr_per_cmd = register_always +
+ (target->scsi_host->max_sectors + 1 +
+ max_sectors_per_mr - 1) / max_sectors_per_mr;
+ } else {
+ mr_per_cmd = register_always +
+ (target->sg_tablesize +
+ srp_dev->max_pages_per_mr - 1) /
+ srp_dev->max_pages_per_mr;
+ }
pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
- target->scsi_host->max_sectors,
- srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
+ target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
max_sectors_per_mr, mr_per_cmd);
}
@@ -3871,12 +3863,10 @@ static ssize_t srp_create_target(struct device *dev,
num_online_nodes());
const int ch_end = ((node_idx + 1) * target->ch_count /
num_online_nodes());
- const int cv_start = (node_idx * ibdev->num_comp_vectors /
- num_online_nodes() + target->comp_vector)
- % ibdev->num_comp_vectors;
- const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
- num_online_nodes() + target->comp_vector)
- % ibdev->num_comp_vectors;
+ const int cv_start = node_idx * ibdev->num_comp_vectors /
+ num_online_nodes();
+ const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
+ num_online_nodes();
int cpu_idx = 0;
for_each_online_cpu(cpu) {
@@ -3907,8 +3897,8 @@ static ssize_t srp_create_target(struct device *dev,
char dst[64];
if (target->using_rdma_cm)
- inet_ntop(&target->rdma_cm.dst, dst,
- sizeof(dst));
+ snprintf(dst, sizeof(dst), "%pIS",
+ &target->rdma_cm.dst);
else
snprintf(dst, sizeof(dst), "%pI6",
target->ib_cm.orig_dgid.raw);
@@ -3941,14 +3931,11 @@ connected:
if (target->state != SRP_TARGET_REMOVED) {
if (target->using_rdma_cm) {
- char dst[64];
-
- inet_ntop(&target->rdma_cm.dst, dst, sizeof(dst));
shost_printk(KERN_DEBUG, target->scsi_host, PFX
- "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n",
+ "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
be64_to_cpu(target->id_ext),
be64_to_cpu(target->ioc_guid),
- target->sgid.raw, dst);
+ target->sgid.raw, &target->rdma_cm.dst);
} else {
shost_printk(KERN_DEBUG, target->scsi_host, PFX
"new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",