From e7d80c830489f67b1d0257e6919840100085dea9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 22 Jun 2017 15:01:10 -0400 Subject: IB/iser: Handle lack of memory management extentions correctly max_fast_reg_page_list_len is only valid when the memory management extentions are signaled by the underlying driver. Fix by adjusting iser_calc_scsi_params() to use ISCSI_ISER_MAX_SG_TABLESIZE when the extentions are not indicated. Reported-by: Thomas Rosenstein Fixes: Commit df749cdc45d9 ("IB/iser: Support up to 8MB data transfer in a single command") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Acked-by: Sagi Grimberg Tested-by: Thomas Rosenstein Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iser_verbs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index c538a38c91ce..26a004e97ae0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -708,8 +708,14 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, unsigned short sg_tablesize, sup_sg_tablesize; sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); - sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, - device->ib_device->attrs.max_fast_reg_page_list_len); + if (device->ib_device->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS) + sup_sg_tablesize = + min_t( + uint, ISCSI_ISER_MAX_SG_TABLESIZE, + device->ib_device->attrs.max_fast_reg_page_list_len); + else + sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE; iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize); } -- cgit From 91647f4c2d66e16b30524613410a638c2c4532bf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:14 -0700 Subject: IB/hfi1: Ensure dd->gi_mask can not be overflowed As the code stands today the array access in remap_intr() is OK. To future proof the code though we should explicitly check to ensure the index value is not outside of the valid range. This is not a straight forward calculation so err on the side of caution. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2ba00b89df6a..94b54850ec75 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12847,7 +12847,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; - dd->gi_mask[m] &= ~((u64)1 << n); + if (likely(m < CCE_NUM_INT_CSRS)) { + dd->gi_mask[m] &= ~((u64)1 << n); + } else { + dd_dev_err(dd, "remap interrupt err\n"); + return; + } /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; -- cgit From 99975cd4fda52974a767aa44fe0b1a8f74950d9d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 24 Apr 2017 15:15:28 -0700 Subject: mlx5: Avoid that mlx5_ib_sg_to_klms() overflows the klms[] array ib_map_mr_sg() can pass an SG-list to .map_mr_sg() that is larger than what fits into a single MR. .map_mr_sg() must not attempt to map more SG-list elements than what fits into a single MR. Hence make sure that mlx5_ib_sg_to_klms() does not write outside the MR klms[] array. Fixes: b005d3164713 ("mlx5: Add arbitrary sg list support") Signed-off-by: Bart Van Assche Reviewed-by: Max Gurtovoy Cc: Sagi Grimberg Cc: Leon Romanovsky Cc: Israel Rukshin Cc: Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 763bb5b36144..2046a6987453 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1779,7 +1779,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { - if (unlikely(i > mr->max_descs)) + if (unlikely(i >= mr->max_descs)) break; klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); -- cgit From 28b5b3a23ba67970f4f534b15c4e4d687136605a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 4 May 2017 20:38:20 -0500 Subject: RDMA/core: Document confusing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking into Coverity ID 1351047 I ran into the following piece of code at drivers/infiniband/core/verbs.c:496: ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,                                    ah_attr->dmac,                                    wc->wc_flags & IB_WC_WITH_VLAN ?                                    NULL : &vlan_id,                                    &if_index, &hoplimit); The issue here is that the position of arguments in the call to rdma_addr_find_l2_eth_by_grh() function do not match the order of the parameters: &dgid is passed to sgid &sgid is passed to dgid This is the function prototype: int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,  const union ib_gid *dgid,  u8 *dmac, u16 *vlan_id, int *if_index,  int *hoplimit) My question here is if this is intentional? Answer: Yes. ib_init_ah_from_wc() creates ah from the incoming packet. Incoming packet has dgid of the receiver node on which this code is getting executed and sgid contains the GID of the sender. When resolving mac address of destination, you use arrived dgid as sgid and use sgid as dgid because sgid contains destinations GID whom to respond to. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c973a83c898b..47ee1f83c9a9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -452,6 +452,19 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, } EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); +/* + * This function creates ah from the incoming packet. + * Incoming packet has dgid of the receiver node on which this code is + * getting executed and, sgid contains the GID of the sender. + * + * When resolving mac address of destination, the arrived dgid is used + * as sgid and, sgid is used as dgid because sgid contains destinations + * GID whom to respond to. + * + * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the + * position of arguments dgid and sgid do not match the order of the + * parameters. + */ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) -- cgit From c8c16d3bae967f1c7af541e8d016e5c51e4f010a Mon Sep 17 00:00:00 2001 From: Vladimir Neyelov Date: Sun, 21 May 2017 19:17:31 +0300 Subject: IB/iser: Fix connection teardown race condition Under heavy iser target(scst) start/stop stress during login/logout on iser intitiator side happened trace call provided below. The function iscsi_iser_slave_alloc iser_conn pointer could be NULL, due to the fact that function iscsi_iser_conn_stop can be called before and free iser connection. Let's protect that flow by introducing global mutex. BUG: unable to handle kernel paging request at 0000000000001018 IP: [] iscsi_iser_slave_alloc+0x1e/0x50 [ib_iser] Call Trace: ? scsi_alloc_sdev+0x242/0x300 scsi_probe_and_add_lun+0x9e1/0xea0 ? kfree_const+0x21/0x30 ? kobject_set_name_vargs+0x76/0x90 ? __pm_runtime_resume+0x5b/0x70 __scsi_scan_target+0xf6/0x250 scsi_scan_target+0xea/0x100 iscsi_user_scan_session.part.13+0x101/0x130 [scsi_transport_iscsi] ? iscsi_user_scan_session.part.13+0x130/0x130 [scsi_transport_iscsi] iscsi_user_scan_session+0x1e/0x30 [scsi_transport_iscsi] device_for_each_child+0x50/0x90 iscsi_user_scan+0x44/0x60 [scsi_transport_iscsi] store_scan+0xa8/0x100 ? common_file_perm+0x5d/0x1c0 dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x12c/0x1c0 __vfs_write+0x18/0x40 vfs_write+0xb5/0x1a0 SyS_write+0x55/0xc0 Fixes: 318d311e8f01 ("iser: Accept arbitrary sg lists mapping if the device supports it") Cc: # v4.5+ Signed-off-by: Vladimir Neyelov Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 5a887efb4bdf..37b33d708c2d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; static struct scsi_transport_template *iscsi_iser_scsi_transport; static struct workqueue_struct *release_wq; +static DEFINE_MUTEX(unbind_iser_conn_mutex); struct iser_global ig; int iser_debug_level = 0; @@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ if (iser_conn) { mutex_lock(&iser_conn->state_mutex); + mutex_lock(&unbind_iser_conn_mutex); iser_conn_terminate(iser_conn); iscsi_conn_stop(cls_conn, flag); /* unbind */ iser_conn->iscsi_conn = NULL; conn->dd_data = NULL; + mutex_unlock(&unbind_iser_conn_mutex); complete(&iser_conn->stop_completion); mutex_unlock(&iser_conn->state_mutex); @@ -977,13 +980,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev) struct iser_conn *iser_conn; struct ib_device *ib_dev; + mutex_lock(&unbind_iser_conn_mutex); + session = starget_to_session(scsi_target(sdev))->dd_data; iser_conn = session->leadconn->dd_data; + if (!iser_conn) { + mutex_unlock(&unbind_iser_conn_mutex); + return -ENOTCONN; + } ib_dev = iser_conn->ib_conn.device->ib_device; if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + mutex_unlock(&unbind_iser_conn_mutex); + return 0; } -- cgit From bebb2a473a43c8f84a8210687d1cbdde503046d7 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 23 May 2017 10:48:44 +0300 Subject: IB/core: Namespace is mandatory input for address resolution In function addr_resolve() the namespace is a required input parameter and not an output. It is passed later for searching the routing table and device addresses. Also, it shouldn't be copied back to the caller. Fixes: 565edd1d5555 ('IB/addr: Pass network namespace as a parameter') Cc: # v4.3+ Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a6cb379a4ebc..d78bc74bc9a9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -509,6 +509,11 @@ static int addr_resolve(struct sockaddr *src_in, struct dst_entry *dst; int ret; + if (!addr->net) { + pr_warn_ratelimited("%s: missing namespace\n", __func__); + return -EINVAL; + } + if (src_in->sa_family == AF_INET) { struct rtable *rt = NULL; const struct sockaddr_in *dst_in4 = @@ -546,7 +551,6 @@ static int addr_resolve(struct sockaddr *src_in, } addr->bound_dev_if = ndev->ifindex; - addr->net = dev_net(ndev); dev_put(ndev); return ret; -- cgit From cbd09aebc2d62095b05797af5c9a315e3a71dcea Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 23 May 2017 10:48:45 +0300 Subject: IB/core: Don't resolve IP address to the loopback device When resolving an IP address that is on the host of the caller the result from querying the routing table is the loopback device. This is not a valid response, because it doesn't represent the RDMA device and the port. Therefore, callers need to check the resolved device and if it is a loopback device find an alternative way to resolve it. To avoid this we make sure that the response from rdma_resolve_ip() will not be the loopback device. While that, we fix an static checker warning about dereferencing an unintitialized pointer using the same solution as in commit abeffce90c7f ("net/mlx5e: Fix a -Wmaybe-uninitialized warning") as a reference. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 40 +++++++++++++++++++++++++++++----------- drivers/infiniband/core/cma.c | 32 +++----------------------------- drivers/infiniband/core/verbs.c | 5 ----- 3 files changed, 32 insertions(+), 45 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d78bc74bc9a9..01236cef7bfb 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -268,6 +268,7 @@ int rdma_translate_ip(const struct sockaddr *addr, return ret; ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); @@ -280,6 +281,7 @@ int rdma_translate_ip(const struct sockaddr *addr, &((const struct sockaddr_in6 *)addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); break; @@ -405,10 +407,10 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.saddr = src_ip; fl4.flowi4_oif = addr->bound_dev_if; rt = ip_route_output_key(addr->net, &fl4); - if (IS_ERR(rt)) { - ret = PTR_ERR(rt); - goto out; - } + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; + src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; @@ -423,8 +425,6 @@ static int addr4_resolve(struct sockaddr_in *src_in, *prt = rt; return 0; -out: - return ret; } #if IS_ENABLED(CONFIG_IPV6) @@ -527,8 +527,12 @@ static int addr_resolve(struct sockaddr *src_in, if (resolve_neigh) ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); - ndev = rt->dst.dev; - dev_hold(ndev); + if (addr->bound_dev_if) { + ndev = dev_get_by_index(addr->net, addr->bound_dev_if); + } else { + ndev = rt->dst.dev; + dev_hold(ndev); + } ip_rt_put(rt); } else { @@ -544,13 +548,27 @@ static int addr_resolve(struct sockaddr *src_in, if (resolve_neigh) ret = addr_resolve_neigh(dst, dst_in, addr, seq); - ndev = dst->dev; - dev_hold(ndev); + if (addr->bound_dev_if) { + ndev = dev_get_by_index(addr->net, addr->bound_dev_if); + } else { + ndev = dst->dev; + dev_hold(ndev); + } dst_release(dst); } - addr->bound_dev_if = ndev->ifindex; + if (ndev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip(dst_in, addr, NULL); + /* + * Put the loopback device and get the translated + * device instead. + */ + dev_put(ndev); + ndev = dev_get_by_index(addr->net, addr->bound_dev_if); + } else { + addr->bound_dev_if = ndev->ifindex; + } dev_put(ndev); return ret; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 31bb82d8ecd7..11aff923b633 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -623,22 +623,11 @@ static inline int cma_validate_port(struct ib_device *device, u8 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; - if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) ndev = dev_get_by_index(&init_net, bound_if_index); - if (ndev && ndev->flags & IFF_LOOPBACK) { - pr_info("detected loopback device\n"); - dev_put(ndev); - - if (!device->get_netdev) - return -EOPNOTSUPP; - - ndev = device->get_netdev(device, port); - if (!ndev) - return -ENODEV; - } - } else { + else gid_type = IB_GID_TYPE_IB; - } + ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, ndev, NULL); @@ -2569,21 +2558,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } - if (ndev->flags & IFF_LOOPBACK) { - dev_put(ndev); - if (!id_priv->id.device->get_netdev) { - ret = -EOPNOTSUPP; - goto err2; - } - - ndev = id_priv->id.device->get_netdev(id_priv->id.device, - id_priv->id.port_num); - if (!ndev) { - ret = -ENODEV; - goto err2; - } - } - supported_gids = roce_gid_type_mask_support(id_priv->id.device, id_priv->id.port_num); gid_type = cma_route_gid_type(addr->dev_addr.network, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 47ee1f83c9a9..644fa0d13f02 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -520,11 +520,6 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, } resolved_dev = dev_get_by_index(&init_net, if_index); - if (resolved_dev->flags & IFF_LOOPBACK) { - dev_put(resolved_dev); - resolved_dev = idev; - dev_hold(resolved_dev); - } rcu_read_lock(); if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, resolved_dev)) -- cgit From a512c2fbef9c700ee1ee0e045b75e140fef8f5ee Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 May 2017 11:26:08 +0300 Subject: IB/core: Introduce modify QP operation with udata This patch adds new function ib_modify_qp_with_udata so that uverbs layer can avoid handling L2 mac address at verbs layer and depend on the core layer to resolve the mac address consistently for all required QPs. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 644fa0d13f02..7f8fe443df46 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1276,20 +1276,36 @@ out: } EXPORT_SYMBOL(ib_resolve_eth_dmac); -int ib_modify_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask) +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) { + int ret; - if (qp_attr_mask & IB_QP_AV) { - int ret; - - ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); + if (attr_mask & IB_QP_AV) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); if (ret) return ret; } + return ib_security_modify_qp(qp, attr, attr_mask, udata); +} +EXPORT_SYMBOL(ib_modify_qp_with_udata); - return ib_security_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); +int ib_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask) +{ + return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); -- cgit From f7c8f2e9ddc71db0ae344f3ffb19df03ef32b719 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 May 2017 11:26:09 +0300 Subject: IB/uverbs: Make use of ib_modify_qp variant to avoid resolving DMAC This patch makes use of IB core's ib_modify_qp_with_udata function that also resolves the DMAC and handles udata. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8ba9bfb073d1..3f55d18a3791 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2005,28 +2005,13 @@ static int modify_qp(struct ib_uverbs_file *file, rdma_ah_set_port_num(&attr->alt_ah_attr, cmd->base.alt_dest.port_num); - if (qp->real_qp == qp) { - if (cmd->base.attr_mask & IB_QP_AV) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto release_qp; - } - ret = ib_security_modify_qp(qp, - attr, - modify_qp_mask(qp->qp_type, - cmd->base.attr_mask), - udata); - } else { - ret = ib_security_modify_qp(qp, - attr, - modify_qp_mask(qp->qp_type, - cmd->base.attr_mask), - NULL); - } + ret = ib_modify_qp_with_udata(qp, attr, + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask), + udata); release_qp: uobj_put_obj_read(qp); - out: kfree(attr); -- cgit From 98e77d9fd7dff05019436370e78c3ec0f9894e25 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 11:29:42 +0300 Subject: IB: Convert msleep below 20ms to usleep_range The msleep(1) may do not sleep 1 ms as expected and will sleep longer. The simple conversion from msleep to usleep_range between 1ms and 2ms can solve an issue. The full and comprehensive explanation can be found at [1] and [2]. [1] https://lkml.org/lkml/2007/8/3/250 [2] Documentation/timers/timers-howto.txt Signed-off-by: Leon Romanovsky Reviewed-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/mcg.c | 2 +- drivers/infiniband/hw/nes/nes_hw.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 37d5d29597a4..729f8cc8738b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -995,7 +995,8 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) goto free_work; } ne -= ret; - msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000, + (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000); } while (ne && time_before_eq(jiffies, end)); if (ne != 0) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 75b2f7d4cd95..d1b43cbbfea7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1155,7 +1155,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * call to mlx4_ib_vma_close. */ put_task_struct(owning_process); - msleep(1); + usleep_range(1000, 2000); owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); if (!owning_process || diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 3405e947dc1e..b73f89700ef9 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy if (!count) break; - msleep(1); + usleep_range(1000, 2000); } while (time_after(end, jiffies)); flush_workqueue(ctx->mcg_wq); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8f9d8b4ad583..b0adf65e4bdb 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -551,7 +551,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) int_cnt++; - msleep(1); + usleep_range(1000, 2000); } if (int_cnt > 1) { spin_lock_irqsave(&nesadapter->phy_lock, flags); @@ -592,7 +592,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { break; } } - msleep(1); + usleep_range(1000, 2000); } } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7cbcfdac6529..d574d41bdf61 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -954,7 +954,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) break; } spin_unlock_irq(&priv->lock); - msleep(1); + usleep_range(1000, 2000); ipoib_drain_cq(dev); spin_lock_irq(&priv->lock); } @@ -1206,7 +1206,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) goto timeout; } - msleep(1); + usleep_range(1000, 2000); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index efe7402f4885..57a9655e844d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -770,7 +770,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_drain_cq(dev); - msleep(1); + usleep_range(1000, 2000); } ipoib_dbg(priv, "All sends and receives done.\n"); -- cgit From ed7b521d8a98c3371e3c9300df8bf3cb774d8ea6 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 23 May 2017 11:42:52 +0300 Subject: IB/IPoIB: Forward MTU change to driver below This patch checks if there is a driver below that needs to be updated on the new MTU and calls it accordingly. Signed-off-by: Erez Shitrit Reviewed by: Alex Vesker Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6e86eeee370e..3e2b7988ead8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -233,6 +233,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + int ret = 0; /* dev->mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(dev)) { @@ -256,9 +257,23 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_dbg(priv, "MTU must be smaller than the underlying " "link layer MTU - 4 (%u)\n", priv->mcast_mtu); - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + new_mtu = min(priv->mcast_mtu, priv->admin_mtu); - return 0; + if (priv->rn_ops->ndo_change_mtu) { + bool carrier_status = netif_carrier_ok(dev); + + netif_carrier_off(dev); + + /* notify lower level on the real mtu */ + ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu); + + if (carrier_status) + netif_carrier_on(dev); + } else { + dev->mtu = new_mtu; + } + + return ret; } /* Called with an RCU read lock taken */ -- cgit From d83187dda9b930dc268ab05da265f3d5d7eca451 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:13 +0300 Subject: IB/IPoIB: Convert IPoIB to memalloc_noio_* calls Commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O during memory allocation") added the memalloc_noio_(save|restore) functions to enable people to modify the MM behavior by disabling I/O during memory allocation. This was further extended in Fixes: 934f3072c17c ("mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set"). memalloc_noio_* functions prevent allocation paths recursing back into the filesystem without explicitly changing the flags for every allocation site. However the IPoIB hasn't been keeping up with the changes and missed completely these memalloc_noio_* calls. This led to update of allocation site with special QP creation flag, see commit 09b93088d750 ("IB: Add a QP creation flag to use GFP_NOIO allocations"), while this flag is supported by small number of drivers in IB stack. Let's change it by updating to memalloc_noio_* calls and allow for every driver underneath enjoy NOIO allocations. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d574d41bdf61..f87d104837dc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ipoib.h" @@ -1047,9 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = tx, - .create_flags = IB_QP_CREATE_USE_GFP_NOIO + .create_flags = 0 }; - struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) @@ -1057,10 +1057,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); - if (PTR_ERR(tx_qp) == -EINVAL) { - attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; - tx_qp = ib_create_qp(priv->pd, &attr); - } tx->max_send_sge = attr.cap.max_send_sge; return tx_qp; } @@ -1131,10 +1127,11 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct sa_path_rec *pathrec) { struct ipoib_dev_priv *priv = ipoib_priv(p->dev); + unsigned int noio_flag; int ret; - p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, - GFP_NOIO, PAGE_KERNEL); + noio_flag = memalloc_noio_save(); + p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring)); if (!p->tx_ring) { ret = -ENOMEM; goto err_tx; @@ -1142,9 +1139,10 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); + memalloc_noio_restore(noio_flag); if (IS_ERR(p->qp)) { ret = PTR_ERR(p->qp); - ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); + ipoib_warn(priv, "failed to create tx qp: %d\n", ret); goto err_qp; } -- cgit From 0f4d027c3b4240ecb314daa948238d459fdc3a00 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:14 +0300 Subject: IB/{rdmavt, qib, hfi1}: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 7 +++-- drivers/infiniband/hw/hfi1/qp.h | 3 +-- drivers/infiniband/hw/qib/qib_qp.c | 15 +++++------ drivers/infiniband/hw/qib/qib_verbs.h | 4 +-- drivers/infiniband/sw/rdmavt/qp.c | 48 ++++++++++------------------------- 5 files changed, 26 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305cc0373..1a7af9f60c13 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -647,18 +647,17 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->pid); } -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp) +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct hfi1_qp_priv *priv; - priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node); + priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp, + priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL, rdi->dparms.node); if (!priv->s_ahg) { kfree(priv); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index 1eb9cd7b8c19..6fe542b6a927 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -123,8 +123,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp); /* * Functions provided by hfi1 driver for rdmavt to use */ -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp); +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); unsigned free_all_qps(struct rvt_dev_info *rdi); void notify_qp_reset(struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 5984981e7dd4..a343e3b5d4cb 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -104,10 +104,9 @@ const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = { }; -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, - gfp_t gfp) +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { - unsigned long page = get_zeroed_page(gfp); + unsigned long page = get_zeroed_page(GFP_KERNEL); /* * Free the page if someone raced with us installing it. @@ -126,7 +125,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) + enum ib_qp_type type, u8 port) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; @@ -160,7 +159,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map, gfp); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -317,16 +316,16 @@ u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) return ib_mtu_enum_to_int(pmtu); } -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct qib_qp_priv *priv; - priv = kzalloc(sizeof(*priv), gfp); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL); if (!priv->s_hdr) { kfree(priv); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index da0db5485ddc..a52fc67b40d7 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -274,11 +274,11 @@ int qib_get_counters(struct qib_pportdata *ppd, * Functions provided by qib driver for rdmavt to use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi); -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qib_notify_qp_reset(struct rvt_qp *qp); int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port); void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 727e81cc2c8f..459865439a0b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -118,10 +118,9 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { EXPORT_SYMBOL(ib_rvt_state_ops); static void get_map_page(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, - gfp_t gfp) + struct rvt_qpn_map *map) { - unsigned long page = get_zeroed_page(gfp); + unsigned long page = get_zeroed_page(GFP_KERNEL); /* * Free the page if someone raced with us installing it. @@ -173,7 +172,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { - get_map_page(qpt, map, GFP_KERNEL); + get_map_page(qpt, map); if (!map->page) { ret = -ENOMEM; break; @@ -342,14 +341,14 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num, gfp_t gfp) + enum ib_qp_type type, u8 port_num) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -374,7 +373,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map, gfp); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -672,7 +671,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; - gfp_t gfp; size_t sqsize; if (!rdi) @@ -680,18 +678,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + init_attr->create_flags) return ERR_PTR(-EINVAL); - /* GFP_NOIO is applicable to RC QP's only */ - - if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && - init_attr->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - - gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? - GFP_NOIO : GFP_KERNEL; - /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || @@ -719,14 +708,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); - if (gfp == GFP_NOIO) - swq = __vmalloc( - sqsize * sz, - gfp | __GFP_ZERO, PAGE_KERNEL); - else - swq = vzalloc_node( - sqsize * sz, - rdi->dparms.node); + swq = vzalloc_node(sqsize * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -741,7 +723,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); + qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL, + rdi->dparms.node); if (!qp) goto bail_swq; @@ -751,7 +734,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, kzalloc_node( sizeof(*qp->s_ack_queue) * rvt_max_atomic(rdi), - gfp, + GFP_KERNEL, rdi->dparms.node); if (!qp->s_ack_queue) goto bail_qp; @@ -766,7 +749,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ - priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); + priv = rdi->driver_f.qp_priv_alloc(rdi, qp); if (IS_ERR(priv)) { ret = priv; goto bail_qp; @@ -786,11 +769,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = vmalloc_user( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); - else if (gfp == GFP_NOIO) - qp->r_rq.wq = __vmalloc( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - gfp | __GFP_ZERO, PAGE_KERNEL); else qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + @@ -824,7 +802,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num, gfp); + init_attr->port_num); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; -- cgit From 8900b894e769dd88b53e519e3502e0e3c349fe95 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:15 +0300 Subject: {net, IB}/mlx4: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 6 +++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 - drivers/infiniband/hw/mlx4/qp.c | 40 +++++++++++++++--------------------- drivers/infiniband/hw/mlx4/srq.c | 8 ++++---- 4 files changed, 24 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 4f5a143fc0a7..ff931c580557 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); + PAGE_SIZE * 2, &buf->buf); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); if (err) goto err_mtt; @@ -219,7 +219,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); + err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) goto err_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c2b9cbf4da05..9db82e67e959 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -185,7 +185,6 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, - MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 996e9058e515..75c0e6c5dd56 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -634,8 +634,8 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, - gfp_t gfp) + struct ib_udata *udata, int sqpn, + struct mlx4_ib_qp **caller_qp) { int qpn; int err; @@ -691,14 +691,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); + sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); + qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); if (!qp) return -ENOMEM; qp->pri.vid = 0xFFFF; @@ -780,7 +780,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); + err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -788,7 +788,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, - &qp->buf, gfp)) { + &qp->buf)) { memcpy(&init_attr->cap, &backup_cap, sizeof(backup_cap)); err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, @@ -797,7 +797,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_db; if (mlx4_buf_alloc(dev->dev, qp->buf_size, - PAGE_SIZE * 2, &qp->buf, gfp)) { + PAGE_SIZE * 2, &qp->buf)) { err = -ENOMEM; goto err_db; } @@ -808,20 +808,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); if (err) goto err_mtt; qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), - gfp | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!qp->sq.wrid) qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), - gfp, PAGE_KERNEL); + GFP_KERNEL, PAGE_KERNEL); qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), - gfp | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!qp->rq.wrid) qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), - gfp, PAGE_KERNEL); + GFP_KERNEL, PAGE_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -859,7 +859,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); if (err) goto err_qpn; @@ -1127,10 +1127,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; - gfp_t gfp; - gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? - GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1140,8 +1137,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | - MLX4_IB_QP_CREATE_ROCE_V2_GSI | - MLX4_IB_QP_CREATE_USE_GFP_NOIO)) + MLX4_IB_QP_CREATE_ROCE_V2_GSI)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1154,7 +1150,6 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_CREATE_USE_GFP_NOIO | MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) && init_attr->qp_type != IB_QPT_UD) || @@ -1179,7 +1174,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, gfp); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; @@ -1188,7 +1183,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp, gfp); + udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1217,8 +1212,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - sqpn, - &qp, gfp); + sqpn, &qp); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index e32dd58937a8..0facaf5f6d23 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -135,14 +135,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); + err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, - GFP_KERNEL)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, + &srq->buf)) { err = -ENOMEM; goto err_db; } @@ -167,7 +167,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); if (err) goto err_mtt; -- cgit From 12cc1a027341338f54d8d3fcf5d188ae2b39c30d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 May 2017 09:44:48 +0300 Subject: IB/mlx5: Clean mr_cache debugfs in case of failure The failure in creation of debugfs entries for mr_cache left entries, which were already created. It caused to mismatch and misguiding for the end users. The solution is to clean mr_cache debugfs root, so no leftovers will be in the system. In addition, let's document why the error is not needed to be forwarded to user in case of failure. Signed-off-by: Leon Romanovsky Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2046a6987453..8ab2f1360a45 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -582,6 +582,15 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } } +static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->cache.root); + dev->cache.root = NULL; +} + static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; @@ -600,38 +609,34 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) sprintf(ent->name, "%d", ent->order); ent->dir = debugfs_create_dir(ent->name, cache->root); if (!ent->dir) - return -ENOMEM; + goto err; ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, &size_fops); if (!ent->fsize) - return -ENOMEM; + goto err; ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, &limit_fops); if (!ent->flimit) - return -ENOMEM; + goto err; ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, &ent->cur); if (!ent->fcur) - return -ENOMEM; + goto err; ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, &ent->miss); if (!ent->fmiss) - return -ENOMEM; + goto err; } return 0; -} - -static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!mlx5_debugfs_root) - return; +err: + mlx5_mr_cache_debugfs_cleanup(dev); - debugfs_remove_recursive(dev->cache.root); + return -ENOMEM; } static void delay_time_func(unsigned long ctx) @@ -692,6 +697,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) if (err) mlx5_ib_warn(dev, "cache debugfs failure\n"); + /* + * We don't want to fail driver if debugfs failed to initialize, + * so we are not forwarding error to the user. + */ + return 0; } -- cgit From 8fe8bacb92f249c91a1407b48aa1cb98067fe19d Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Tue, 30 May 2017 09:58:06 +0300 Subject: IB/core: Add ordered workqueue for RoCE GID management Currently the RoCE GID management uses the ib_wq to do add and delete new GIDs according to the netdev events. The ib_wq isn't an ordered workqueue and thus two work elements can be executed concurrently which will result in unexpected behavior and inconsistency of the GIDs cache content. Example: ifconfig eth1 11.11.11.11/16 up This command will invoke the following netdev events in the following order: 1. NETDEV_UP 2. NETDEV_DOWN 3. NETDEV_UP If (2) and (3) will be executed concurrently or in reverse order, instead of having a new GID with 11.11.11.11 IP, we will end up without any new GIDs. Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/roce_gid_mgmt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index db958d3207ef..94a9eefb3cfc 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -42,6 +42,8 @@ #include #include +static struct workqueue_struct *gid_cache_wq; + enum gid_op_type { GID_DEL = 0, GID_ADD @@ -560,7 +562,7 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, } INIT_WORK(&ndev_work->work, netdevice_event_work_handler); - queue_work(ib_wq, &ndev_work->work); + queue_work(gid_cache_wq, &ndev_work->work); return NOTIFY_DONE; } @@ -693,7 +695,7 @@ static int addr_event(struct notifier_block *this, unsigned long event, dev_hold(ndev); work->gid_attr.ndev = ndev; - queue_work(ib_wq, &work->work); + queue_work(gid_cache_wq, &work->work); return NOTIFY_DONE; } @@ -740,6 +742,10 @@ static struct notifier_block nb_inet6addr = { int __init roce_gid_mgmt_init(void) { + gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0); + if (!gid_cache_wq) + return -ENOMEM; + register_inetaddr_notifier(&nb_inetaddr); if (IS_ENABLED(CONFIG_IPV6)) register_inet6addr_notifier(&nb_inet6addr); @@ -764,4 +770,5 @@ void __exit roce_gid_mgmt_cleanup(void) * ib-core is removed, all physical devices have been removed, * so no issue with remaining hardware contexts. */ + destroy_workqueue(gid_cache_wq); } -- cgit From b6c871e5875798e5ed3744c725622dcd3c92be92 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Mon, 12 Jun 2017 10:45:21 +0300 Subject: IB/ipoib: Let lower driver handle get_stats64 call The driver checks if the lower level driver supports get_stats, and if so calls it to get the updated statistics, otherwise takes from the current netdevice stats object. Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e2b7988ead8..70dacaf9044e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -276,6 +276,17 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return ret; } +static void ipoib_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (priv->rn_ops->ndo_get_stats64) + priv->rn_ops->ndo_get_stats64(dev, stats); + else + netdev_stats_to_stats64(stats, &dev->stats); +} + /* Called with an RCU read lock taken */ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, struct net_device *dev) @@ -1823,6 +1834,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_get_vf_stats = ipoib_get_vf_stats, .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, + .ndo_get_stats64 = ipoib_get_stats, }; static const struct net_device_ops ipoib_netdev_ops_vf = { -- cgit From fda85ce912401750e1e80757627af2784c7cc5a7 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Thu, 22 Jun 2017 17:09:59 +0300 Subject: IB/rxe: Fix kernel panic from skb destructor In the time between rxe_send has finished and skb destructor called, the QP's ref count might be 0, leading to a possible QP destruction. This will lead to a kernel panic when the destructor dereferences the QP. The operation of incrementing QP ref count at rxe_send and decrementing from skb destructor will prevent this crash. BUG: unable to handle kernel NULL pointer dereference at 000000000000072c IP: [] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe] PGD 0 [16240.211178] Oops: 0002 [#1] SMP CPU: 3 PID: 0 Comm: swapper/3 Tainted: G OE 4.9.0-mlnx #1 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 task: ffff88042d6b1480 task.stack: ffffc90001904000 RIP: 0010:[] [] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe] RSP: 0018:ffff88043fcc3df0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880429684700 RCX: ffff88042d248200 RDX: 00000000ffffffff RSI: 00000000fffffe01 RDI: ffff880429684700 RBP: ffff88043fcc3e00 R08: ffff88043fcda240 R09: 00000000ff2d1de6 R10: 0000000000000000 R11: 00000000f49cf6fe R12: ffff880429684700 R13: ffffffff81893f96 R14: ffffffff817d66f0 R15: ffff880427f74200 FS: 0000000000000000(0000) GS:ffff88043fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000072c CR3: 000000041d3df000 CR4: 00000000000006e0 Stack: ffffffff817b29cf ffff880429684700 ffff88043fcc3e18 ffffffff817b42c2 ffff880429684700 ffff88043fcc3e40 ffffffff817b4332 ffff880429684700 ffff880427f74238 ffff880427f74228 ffff88043fcc3e58 ffffffff81893f96 Call Trace: [16240.336345] [] ? skb_release_head_state+0x4f/0xb0 [] skb_release_all+0x12/0x30 [] kfree_skb+0x32/0x90 [] ndisc_error_report+0x36/0x40 [] neigh_invalidate+0x81/0xf0 [] neigh_timer_handler+0x207/0x2b0 [] call_timer_fn+0x35/0x120 [] run_timer_softirq+0x1d7/0x460 [] ? kvm_sched_clock_read+0x1e/0x30 [] ? sched_clock+0x9/0x10 [] ? sched_clock_cpu+0x72/0xa0 [] __do_softirq+0xd7/0x289 [] irq_exit+0xb5/0xc0 [] smp_apic_timer_interrupt+0x42/0x50 [] apic_timer_interrupt+0x82/0x90 [16240.395776] [] ? native_safe_halt+0x6/0x10 [] default_idle+0x1e/0xd0 [] arch_cpu_idle+0xf/0x20 [] default_idle_call+0x35/0x40 [] cpu_startup_entry+0x185/0x210 [] start_secondary+0x103/0x130 RIP [] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe] Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Reviewed-by: Johannes Thumshirn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c3a140ed4df2..08f3f90d2912 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -441,6 +441,8 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) if (unlikely(qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_run_task(&qp->req.task, 1); + + rxe_drop_ref(qp); } int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) @@ -473,6 +475,7 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) return -EAGAIN; } + rxe_add_ref(pkt->qp); atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); -- cgit From 56012e1cada54460f9e456cd77276e765e06ce6c Mon Sep 17 00:00:00 2001 From: yonatanc Date: Thu, 22 Jun 2017 17:10:00 +0300 Subject: IB/rxe: Set dma_mask and coherent_dma_mask The RXE coupled with dummy device causes to the kernel panic attached below. The panic happens when ib_register_device tries to set dma_mask by accessing a NULLed parent device. The RXE does not actually use DMA, so we can set the dma_mask to architecture value. [16240.199689] RIP: 0010:ib_register_device+0x468/0x5a0 [ib_core] [16240.205289] RSP: 0018:ffffc9000220fc10 EFLAGS: 00010246 [16240.209909] RAX: 0000000000000024 RBX: ffff880220d1a2a8 RCX: 0000000000000000 [16240.212244] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 [16240.214385] RBP: ffffc9000220fcb0 R08: 0000000000000000 R09: 000000000000023f [16240.254465] R10: 0000000000000007 R11: 0000000000000000 R12: 0000000000000000 [16240.259467] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880220d1a2a8 [16240.263314] FS: 00007fd8ecca0740(0000) GS:ffff8802364c0000(0000) knlGS:0000000000000000 [16240.267292] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [16240.273503] CR2: 0000000000000218 CR3: 00000002253ba000 CR4: 00000000000006e0 [16240.277066] Call Trace: [16240.281836] ? __kmalloc+0x26f/0x280 [16240.286596] rxe_register_device+0x297/0x300 [rdma_rxe] [16240.291377] rxe_add+0x535/0x5b0 [rdma_rxe] [16240.297586] rxe_net_add+0x3e/0xc0 [rdma_rxe] [16240.302375] rxe_param_set_add+0x65/0x144 [rdma_rxe] [16240.307769] param_attr_store+0x68/0xd0 [16240.311640] module_attr_store+0x1d/0x30 [16240.316421] sysfs_kf_write+0x3a/0x50 [16240.317802] kernfs_fop_write+0xff/0x180 [16240.322989] __vfs_write+0x37/0x140 [16240.328164] ? handle_mm_fault+0xce/0x240 [16240.333340] vfs_write+0xb2/0x1b0 [16240.335013] SyS_write+0x55/0xc0 [16240.340632] entry_SYSCALL_64_fastpath+0x1a/0xa9 Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Reviewed-by: Johannes Thumshirn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 073e66783f1d..07511718d98d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1240,6 +1240,8 @@ int rxe_register_device(struct rxe_dev *rxe) addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; + dma_coerce_mask_and_coherent(&dev->dev, + dma_get_required_mask(dev->dev.parent)); dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) -- cgit From 5802883d4b7c544012a1857660f78af41f6c183a Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:21 +0800 Subject: IB/hns: Fix the bug of polling cq failed for loopback Qps In hip06 SoC, RoCE driver creates 8 reserved loopback QPs to ensure zero wqe when free mr. However, if the enabled phy port number is less than 6, it will fail in polling cqe with 8 reserved loopback QPs. In order to solve this problem, the number of loopback Qps will be adjusted based on the number of enabled phy port. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 53 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 729f8cc8738b..c291b2a4c7cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -661,9 +661,11 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) union ib_gid dgid; u64 subnet_prefix; int attr_mask = 0; - int i; + int i, j; int ret; + u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 }; u8 phy_port; + u8 port = 0; u8 sl; priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; @@ -709,11 +711,27 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) attr.rnr_retry = 7; attr.timeout = 0x12; attr.path_mtu = IB_MTU_256; + attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0); rdma_ah_set_static_rate(&attr.ah_attr, 3); subnet_prefix = cpu_to_be64(0xfe80000000000000LL); for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : + (i % HNS_ROCE_MAX_PORTS); + sl = i / HNS_ROCE_MAX_PORTS; + + for (j = 0; j < caps->num_ports; j++) { + if (hr_dev->iboe.phy_port[j] == phy_port) { + queue_en[i] = 1; + port = j; + break; + } + } + + if (!queue_en[i]) + continue; + free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); if (IS_ERR(free_mr->mr_free_qp[i])) { dev_err(dev, "Create loop qp failed!\n"); @@ -721,15 +739,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) } hr_qp = free_mr->mr_free_qp[i]; - sl = i / caps->num_ports; - - if (caps->num_ports == HNS_ROCE_MAX_PORTS) - phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : - (i % caps->num_ports); - else - phy_port = i % caps->num_ports; - - hr_qp->port = phy_port + 1; + hr_qp->port = port; hr_qp->phy_port = phy_port; hr_qp->ibqp.qp_type = IB_QPT_RC; hr_qp->ibqp.device = &hr_dev->ib_dev; @@ -739,23 +749,22 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) hr_qp->ibqp.recv_cq = cq; hr_qp->ibqp.send_cq = cq; - rdma_ah_set_port_num(&attr.ah_attr, phy_port + 1); - rdma_ah_set_sl(&attr.ah_attr, phy_port + 1); - attr.port_num = phy_port + 1; + rdma_ah_set_port_num(&attr.ah_attr, port + 1); + rdma_ah_set_sl(&attr.ah_attr, sl); + attr.port_num = port + 1; attr.dest_qp_num = hr_qp->qpn; memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr), - hr_dev->dev_addr[phy_port], + hr_dev->dev_addr[port], MAC_ADDR_OCTET_NUM); memcpy(&dgid.raw, &subnet_prefix, sizeof(u64)); - memcpy(&dgid.raw[8], hr_dev->dev_addr[phy_port], 3); - memcpy(&dgid.raw[13], hr_dev->dev_addr[phy_port] + 3, 3); + memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3); + memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3); dgid.raw[11] = 0xff; dgid.raw[12] = 0xfe; dgid.raw[8] ^= 2; rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw); - attr_mask |= IB_QP_PORT; ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, IB_QPS_RESET, IB_QPS_INIT); @@ -812,6 +821,9 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { hr_qp = free_mr->mr_free_qp[i]; + if (!hr_qp) + continue; + ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp); if (ret) dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", @@ -963,7 +975,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; int i; int ret; - int ne; + int ne = 0; mr_work = container_of(work, struct hns_roce_mr_free_work, work); hr_mr = (struct hns_roce_mr *)mr_work->mr; @@ -976,6 +988,10 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { hr_qp = free_mr->mr_free_qp[i]; + if (!hr_qp) + continue; + ne++; + ret = hns_roce_v1_send_lp_wqe(hr_qp); if (ret) { dev_err(dev, @@ -985,7 +1001,6 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) } } - ne = HNS_ROCE_V1_RESV_QP; do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); if (ret < 0) { -- cgit From 58c4f0d85f59c458074f016c13991c0a81105180 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:22 +0800 Subject: IB/hns: Fix the bug with wild pointer when destroy rc qp When destroyed rc qp, the hr_qp will be used after freed. This patch will fix it. Signed-off-by: Lijun Ou Reported-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c291b2a4c7cf..2fe353001b04 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3657,6 +3657,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) struct hns_roce_dev *hr_dev; struct hns_roce_qp *hr_qp; struct device *dev; + unsigned long qpn; int ret; qp_work_entry = container_of(work, struct hns_roce_qp_work, work); @@ -3664,8 +3665,9 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) dev = &hr_dev->pdev->dev; priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; hr_qp = qp_work_entry->qp; + qpn = hr_qp->qpn; - dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn); + dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn); qp_work_entry->sche_cnt++; @@ -3676,7 +3678,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) &qp_work_entry->db_wait_stage); if (ret) { dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - hr_qp->qpn); + qpn); return; } @@ -3690,7 +3692,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn); + dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn); return; } @@ -3699,14 +3701,14 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) if (hr_qp->ibqp.qp_type == IB_QPT_RC) { /* RC QP, release QPN */ - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + hns_roce_release_range_qp(hr_dev, qpn, 1); kfree(hr_qp); } else kfree(hr_to_hr_sqp(hr_qp)); kfree(qp_work_entry); - dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn); + dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn); } int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) -- cgit From 9de61d3fcdde06087f65b4022a1a966c10ab5803 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:23 +0800 Subject: IB/hns: Fix the bug with rdma operation When opcode of work request is RDMA read and write, it should use rdma_wr to get remote_addr and rkey. This patch fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 2fe353001b04..c42e883a18a8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -228,14 +228,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_READ: ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; - set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, - atomic_wr(wr)->rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; - set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, - atomic_wr(wr)->rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); break; case IB_WR_SEND: case IB_WR_SEND_WITH_INV: -- cgit From d322f004aaa647a5dc9dcddfe5ab1bff1e92f634 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:24 +0800 Subject: IB/hns: Fix the bug with modifying the MAC address without removing the driver When modified the MAC address used hns_roce_mac function, we release and create reserved qp again, It is not necessary to use spin_lock_bh and spin_unlock_bh in handle_en_event, Otherwise, it will occur a error. This patch mainly fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c3b41f95e70a..d9777b662eba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -125,8 +125,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, return -ENODEV; } - spin_lock_bh(&hr_dev->iboe.lock); - switch (event) { case NETDEV_UP: case NETDEV_CHANGE: @@ -144,7 +142,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, break; } - spin_unlock_bh(&hr_dev->iboe.lock); return 0; } -- cgit From 5f110ac4bed8693adb21146067149a48c2b9bd07 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:25 +0800 Subject: IB/hns: Fix for checkpatch.pl comment style warnings This patch correct the comment style warnings caught by checkpatch.pl script. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c42e883a18a8..23fad6d96944 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2197,7 +2197,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, } wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; - } else { + } else { /* RQ conrespond to CQE */ wc->byte_len = le32_to_cpu(cqe->byte_cnt); opcode = roce_get_field(cqe->cqe_byte_4, @@ -3549,10 +3549,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, old_cnt = roce_get_field(old_send, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + if (cur_cnt - old_cnt > + SDB_ST_CMP_VAL) { success_flags = 1; - else { - send_ptr = roce_get_field(old_send, + } else { + send_ptr = + roce_get_field(old_send, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + roce_get_field(sdb_retry_cnt, -- cgit From ebc9ca43e1d52a85c72fc2d343f353386ed6c188 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 29 May 2017 17:20:53 -0700 Subject: IB/core: Allow QP state transition from reset to error Playing with IP-O-IB interface can trigger a warning message: "ib0: Failed to modify QP to ERROR state" to be logged. This happens when the QP is in IB_QPS_RESET state and the stack is trying to transition it to IB_QPS_ERR state in ipoib_ib_dev_stop(). According to the IB spec, Table 91 - "QP State Transition Properties" it looks like the transition from reset to error is valid: Transition: Any State to Error Required Attributes: None Optional Attributes: None allowed Actions: Queue processing is stopped. Work Requests pending or in process are completed in error, when possible. This patch allows the transition and quiets the message. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7f8fe443df46..fb98ed67d5bc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -895,6 +895,7 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { -- cgit From be8822db62ddda6d316d2dd682679732ed2f0abf Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Fri, 23 Jun 2017 16:03:55 -0500 Subject: i40iw: Fix order of cleanup in close The order for calling i40iw_destroy_pble_pool is incorrect. Also, add PBLE_CHUNK_MEM init state to track pble pool creation and destruction. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index e0f47cc2effc..8fc61b3bd223 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1474,6 +1474,9 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); } /* fallthrough */ + case PBLE_CHUNK_MEM: + i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc); + /* fallthrough */ case CEQ_CREATED: i40iw_dele_ceqs(iwdev, reset); /* fallthrough */ @@ -1489,9 +1492,6 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) case CCQ_CREATED: i40iw_destroy_ccq(iwdev, reset); /* fallthrough */ - case PBLE_CHUNK_MEM: - i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc); - /* fallthrough */ case HMC_OBJS_CREATED: i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset); /* fallthrough */ @@ -1670,6 +1670,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc); if (status) break; + iwdev->init_state = PBLE_CHUNK_MEM; iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM); i40iw_register_notifiers(); iwdev->init_state = INET_NOTIFIER; -- cgit From 415920aa174666c0ac8c47eee974acc9f49efec4 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Fri, 23 Jun 2017 16:03:56 -0500 Subject: i40iw: Do not poll CCQ after it is destroyed Control Queue Pair (CQP) OPs, in this case - Update SDs, cannot poll the Control Completion Queue (CCQ) after CCQ is destroyed. Instead, poll via registers. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index a027e2072477..9ec1ae9a82c9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -1970,6 +1970,8 @@ static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq, ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000); } + cqp->process_cqp_sds = i40iw_update_sds_noccq; + return ret_code; } -- cgit From 6c1d94de4e75160d3ea5af3bf51d290341db1d44 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 23 Jun 2017 16:03:57 -0500 Subject: i40iw: Utilize iwdev->reset during PCI function reset Utilize iwdev->reset on a PCI function reset notification instead of passing in reset flag for resource clean-up. Signed-off-by: Shiraz Saleem Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 51 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 8fc61b3bd223..3bad7d967abe 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -274,13 +274,12 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev, /** * i40iw_destroy_aeq - destroy aeq * @iwdev: iwarp device - * @reset: true if called before reset * * Issue a destroy aeq request and * free the resources associated with the aeq * The function is called during driver unload */ -static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset) +static void i40iw_destroy_aeq(struct i40iw_device *iwdev) { enum i40iw_status_code status = I40IW_ERR_NOT_READY; struct i40iw_sc_dev *dev = &iwdev->sc_dev; @@ -288,7 +287,7 @@ static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset) if (!iwdev->msix_shared) i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev); - if (reset) + if (iwdev->reset) goto exit; if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1)) @@ -304,19 +303,17 @@ exit: * i40iw_destroy_ceq - destroy ceq * @iwdev: iwarp device * @iwceq: ceq to be destroyed - * @reset: true if called before reset * * Issue a destroy ceq request and * free the resources associated with the ceq */ static void i40iw_destroy_ceq(struct i40iw_device *iwdev, - struct i40iw_ceq *iwceq, - bool reset) + struct i40iw_ceq *iwceq) { enum i40iw_status_code status; struct i40iw_sc_dev *dev = &iwdev->sc_dev; - if (reset) + if (iwdev->reset) goto exit; status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1); @@ -335,12 +332,11 @@ exit: /** * i40iw_dele_ceqs - destroy all ceq's * @iwdev: iwarp device - * @reset: true if called before reset * * Go through all of the device ceq's and for each ceq * disable the ceq interrupt and destroy the ceq */ -static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset) +static void i40iw_dele_ceqs(struct i40iw_device *iwdev) { u32 i = 0; struct i40iw_sc_dev *dev = &iwdev->sc_dev; @@ -349,32 +345,31 @@ static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset) if (iwdev->msix_shared) { i40iw_disable_irq(dev, msix_vec, (void *)iwdev); - i40iw_destroy_ceq(iwdev, iwceq, reset); + i40iw_destroy_ceq(iwdev, iwceq); iwceq++; i++; } for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) { i40iw_disable_irq(dev, msix_vec, (void *)iwceq); - i40iw_destroy_ceq(iwdev, iwceq, reset); + i40iw_destroy_ceq(iwdev, iwceq); } } /** * i40iw_destroy_ccq - destroy control cq * @iwdev: iwarp device - * @reset: true if called before reset * * Issue destroy ccq request and * free the resources associated with the ccq */ -static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset) +static void i40iw_destroy_ccq(struct i40iw_device *iwdev) { struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_ccq *ccq = &iwdev->ccq; enum i40iw_status_code status = 0; - if (!reset) + if (!iwdev->reset) status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true); if (status) i40iw_pr_err("ccq destroy failed %d\n", status); @@ -810,7 +805,7 @@ static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev, iwceq->msix_idx = msix_vec->idx; status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec); if (status) { - i40iw_destroy_ceq(iwdev, iwceq, false); + i40iw_destroy_ceq(iwdev, iwceq); break; } i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx); @@ -912,7 +907,7 @@ static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev) status = i40iw_configure_aeq_vector(iwdev); if (status) { - i40iw_destroy_aeq(iwdev, false); + i40iw_destroy_aeq(iwdev); return status; } @@ -1442,12 +1437,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev, /** * i40iw_deinit_device - clean up the device resources * @iwdev: iwarp device - * @reset: true if called before reset * * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses, * destroy the device queues and free the pble and the hmc objects */ -static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) +static void i40iw_deinit_device(struct i40iw_device *iwdev) { struct i40e_info *ldev = iwdev->ldev; @@ -1464,7 +1458,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) i40iw_destroy_rdma_device(iwdev->iwibdev); /* fallthrough */ case IP_ADDR_REGISTERED: - if (!reset) + if (!iwdev->reset) i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case INET_NOTIFIER: @@ -1478,22 +1472,22 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc); /* fallthrough */ case CEQ_CREATED: - i40iw_dele_ceqs(iwdev, reset); + i40iw_dele_ceqs(iwdev); /* fallthrough */ case AEQ_CREATED: - i40iw_destroy_aeq(iwdev, reset); + i40iw_destroy_aeq(iwdev); /* fallthrough */ case IEQ_CREATED: - i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset); + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, iwdev->reset); /* fallthrough */ case ILQ_CREATED: - i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset); + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, iwdev->reset); /* fallthrough */ case CCQ_CREATED: - i40iw_destroy_ccq(iwdev, reset); + i40iw_destroy_ccq(iwdev); /* fallthrough */ case HMC_OBJS_CREATED: - i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset); + i40iw_del_hmc_objects(dev, dev->hmc_info, true, iwdev->reset); /* fallthrough */ case CQP_CREATED: i40iw_destroy_cqp(iwdev, true); @@ -1694,7 +1688,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) } while (0); i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state); - i40iw_deinit_device(iwdev, false); + i40iw_deinit_device(iwdev); return -ERESTART; } @@ -1775,9 +1769,12 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool iwdev = &hdl->device; iwdev->closing = true; + if (reset) + iwdev->reset = true; + i40iw_cm_disconnect_all(iwdev); destroy_workqueue(iwdev->virtchnl_wq); - i40iw_deinit_device(iwdev, reset); + i40iw_deinit_device(iwdev); } /** -- cgit From 6327cb09dfda103f7255ef218ac18697b293554a Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 23 Jun 2017 16:03:58 -0500 Subject: i40iw: Release cm_id ref on PCI function reset On PCI function reset, cm_id reference is not released which causes an application hang, as it waits on the cm_id to be released on rdma_destroy. To fix this, call i40iw_cm_disconn during a PCI function reset to clean-up resources and release cm_id reference. Signed-off-by: Shiraz Saleem Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 6ae98aa7f74e..5a2fa743676c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3487,7 +3487,8 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp) if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) || (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) || (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) || - (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) { + (last_ae == I40IW_AE_LLP_CONNECTION_RESET) || + iwdev->reset)) { issue_close = 1; iwqp->cm_id = NULL; if (!iwqp->flush_issued) { @@ -4265,6 +4266,8 @@ void i40iw_cm_disconnect_all(struct i40iw_device *iwdev) cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry); attr.qp_state = IB_QPS_ERR; i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); + if (iwdev->reset) + i40iw_cm_disconn(cm_node->iwqp); i40iw_rem_ref_cm_node(cm_node); } } -- cgit From b5e452a04a10f12763f9836d3d3999f3bb1e56fb Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 23 Jun 2017 16:03:59 -0500 Subject: i40iw: Free QP resources on CQP destroy QP failure Current flow leaves software QP structures in memory if Control Queue Pair (CQP) destroy QP OP fails. To fix this, free QP resources on fail of CQP destroy QP OP. Signed-off-by: Shiraz Saleem Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 56d986924a4c..ded8e48ed3c8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -546,8 +546,12 @@ void i40iw_rem_ref(struct ib_qp *ibqp) cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; cqp_info->in.u.qp_destroy.remove_hash_idx = true; status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (status) - i40iw_pr_err("CQP-OP Destroy QP fail"); + if (!status) + return; + + i40iw_rem_pdusecount(iwqp->iwpd, iwdev); + i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_rem_devusecount(iwdev); } /** -- cgit From c5c9d27e6c79ab3ab36092fe67fb7f2c6a120171 Mon Sep 17 00:00:00 2001 From: Henry Orosco Date: Fri, 23 Jun 2017 16:04:00 -0500 Subject: i40iw: Add missing memory barrier Add missing write memory barrier before writing the header containing valid bit to the WQE in i40iw_puda_send. Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index db41ab40da9c..1bb16814560f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -408,6 +408,9 @@ enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp, set_64bit_val(wqe, 0, info->paddr); set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN)); set_64bit_val(wqe, 16, header[0]); + + /* Ensure all data is written before writing valid bit */ + wmb(); set_64bit_val(wqe, 24, header[1]); i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32); -- cgit From c709d7f229a273c7c5664e9dfe5432b031842d0c Mon Sep 17 00:00:00 2001 From: Henry Orosco Date: Fri, 23 Jun 2017 16:04:01 -0500 Subject: i40iw: Update list correctly To avoid infinite loop, in i40iw_ieq_handle_exception, update plist inside while loop. Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 1bb16814560f..71050c5d29a0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -1414,10 +1414,10 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, if (!list_empty(rxlist)) { tmpbuf = (struct i40iw_puda_buf *)rxlist->next; - plist = &tmpbuf->list; while ((struct list_head *)tmpbuf != rxlist) { if ((int)(buf->seqnum - tmpbuf->seqnum) < 0) break; + plist = &tmpbuf->list; tmpbuf = (struct i40iw_puda_buf *)plist->next; } /* Insert buf before tmpbuf */ -- cgit From 44b99f88cdd5b47046c511aa64ae71ad2c9e5b1e Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 23 Jun 2017 16:04:02 -0500 Subject: i40iw: Avoid memory leak of CQP request objects Control Queue Pair (CQP) request objects, which have not received a completion upon interface close, remain in memory. To fix this, identify and free all pending CQP request objects during destroy CQP OP. Signed-off-by: Shiraz Saleem Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_main.c | 2 ++ drivers/infiniband/hw/i40iw/i40iw_utils.c | 52 +++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index da2eb5a281fa..9b1566468744 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -527,6 +527,7 @@ enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev, int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq); +void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev); void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev); void i40iw_add_pdusecount(struct i40iw_pd *iwpd); void i40iw_rem_devusecount(struct i40iw_device *iwdev); diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 3bad7d967abe..ae8463ff59a7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -243,6 +243,8 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) if (free_hwcqp) dev->cqp_ops->cqp_destroy(dev->cqp); + i40iw_cleanup_pending_cqp_op(iwdev); + i40iw_free_dma_mem(dev->hw, &cqp->sq); kfree(cqp->scratch_array); iwdev->cqp.scratch_array = NULL; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index ded8e48ed3c8..e311ec559f4e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -337,6 +337,7 @@ struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait */ void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request) { + struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp); unsigned long flags; if (cqp_request->dynamic) { @@ -350,6 +351,7 @@ void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs); spin_unlock_irqrestore(&cqp->req_lock, flags); } + wake_up(&iwdev->close_wq); } /** @@ -364,6 +366,56 @@ void i40iw_put_cqp_request(struct i40iw_cqp *cqp, i40iw_free_cqp_request(cqp, cqp_request); } +/** + * i40iw_free_pending_cqp_request -free pending cqp request objs + * @cqp: cqp ptr + * @cqp_request: to be put back in cqp list + */ +static void i40iw_free_pending_cqp_request(struct i40iw_cqp *cqp, + struct i40iw_cqp_request *cqp_request) +{ + struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp); + + if (cqp_request->waiting) { + cqp_request->compl_info.error = true; + cqp_request->request_done = true; + wake_up(&cqp_request->waitq); + } + i40iw_put_cqp_request(cqp, cqp_request); + wait_event_timeout(iwdev->close_wq, + !atomic_read(&cqp_request->refcount), + 1000); +} + +/** + * i40iw_cleanup_pending_cqp_op - clean-up cqp with no completions + * @iwdev: iwarp device + */ +void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev) +{ + struct i40iw_sc_dev *dev = &iwdev->sc_dev; + struct i40iw_cqp *cqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request = NULL; + struct cqp_commands_info *pcmdinfo = NULL; + u32 i, pending_work, wqe_idx; + + pending_work = I40IW_RING_WORK_AVAILABLE(cqp->sc_cqp.sq_ring); + wqe_idx = I40IW_RING_GETCURRENT_TAIL(cqp->sc_cqp.sq_ring); + for (i = 0; i < pending_work; i++) { + cqp_request = (struct i40iw_cqp_request *)(unsigned long)cqp->scratch_array[wqe_idx]; + if (cqp_request) + i40iw_free_pending_cqp_request(cqp, cqp_request); + wqe_idx = (wqe_idx + 1) % I40IW_RING_GETSIZE(cqp->sc_cqp.sq_ring); + } + + while (!list_empty(&dev->cqp_cmd_head)) { + pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head); + cqp_request = container_of(pcmdinfo, struct i40iw_cqp_request, info); + if (cqp_request) + i40iw_free_pending_cqp_request(cqp, cqp_request); + } +} + /** * i40iw_free_qp - callback after destroy cqp completes * @cqp_request: cqp request for destroy qp -- cgit From af56e53ccd29bda062a1ae75276dc9c0f8eedf47 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 5 Jul 2017 21:25:33 -0500 Subject: i40iw: Free QP PBLEs when the QP is destroyed If the physical buffer list entries (PBLEs) of a QP are freed up at i40iw_dereg_mr, they can be assigned to a newly created QP before the previous QP is destroyed. Fix this by freeing PBLEs only when the QP is destroyed. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 15 +++++++++++---- drivers/infiniband/hw/i40iw/i40iw_verbs.h | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 4dbe61ec7a77..4aa0264ccb3a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -426,9 +426,13 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, u32 qp_num) { + struct i40iw_pbl *iwpbl = &iwqp->iwpbl; + i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); if (qp_num) i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); + if (iwpbl->pbl_allocated) + i40iw_free_pble(iwdev->pble_rsrc, &iwpbl->pble_alloc); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.wrid_mem); @@ -483,7 +487,7 @@ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, struct i40iw_qp_init_info *init_info) { - struct i40iw_pbl *iwpbl = iwqp->iwpbl; + struct i40iw_pbl *iwpbl = &iwqp->iwpbl; struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr; iwqp->page = qpmr->sq_page; @@ -688,19 +692,22 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, ucontext = to_ucontext(ibpd->uobject->context); if (req.user_wqe_buffers) { + struct i40iw_pbl *iwpbl; + spin_lock_irqsave( &ucontext->qp_reg_mem_list_lock, flags); - iwqp->iwpbl = i40iw_get_pbl( + iwpbl = i40iw_get_pbl( (unsigned long)req.user_wqe_buffers, &ucontext->qp_reg_mem_list); spin_unlock_irqrestore( &ucontext->qp_reg_mem_list_lock, flags); - if (!iwqp->iwpbl) { + if (!iwpbl) { err_code = -ENODATA; i40iw_pr_err("no pbl info\n"); goto error; } + memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); } } err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info); @@ -2063,7 +2070,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) ucontext = to_ucontext(ibpd->uobject->context); i40iw_del_memlist(iwmr, ucontext); } - if (iwpbl->pbl_allocated) + if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP) i40iw_free_pble(iwdev->pble_rsrc, palloc); kfree(iwmr); return 0; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 07c3fec77de6..9067443cd311 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -170,7 +170,7 @@ struct i40iw_qp { struct i40iw_qp_kmode kqp; struct i40iw_dma_mem host_ctx; struct timer_list terminate_timer; - struct i40iw_pbl *iwpbl; + struct i40iw_pbl iwpbl; struct i40iw_dma_mem q2_ctx_mem; struct i40iw_dma_mem ietf_mem; struct completion sq_drained; -- cgit From 653f0a71daf1a71d55d3af368c28c8114c11c607 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Jul 2017 10:22:47 +0300 Subject: RDMA/bnxt_re: checking for NULL instead of IS_ERR() bnxt_re_alloc_mw() doesn't return NULL, it returns error pointers. Fixes: 9152e0b722b2 ("RDMA/bnxt_re: HW workarounds for handling specific conditions") Signed-off-by: Dan Carpenter Acked-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index c7bd68311d0c..e794b0fa4ae6 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -588,10 +588,10 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) /* Create a fence MW only for kernel consumers */ mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL); - if (!mw) { + if (IS_ERR(mw)) { dev_err(rdev_to_dev(rdev), "Failed to create fence-MW for PD: %p\n", pd); - rc = -EINVAL; + rc = PTR_ERR(mw); goto fail; } fence->mw = mw; -- cgit From 5c8857b653e71a9850a02837e1268e3198abbd1a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:45:48 +0300 Subject: IB/IPoIB: Fix error code in ipoib_add_port() We accidentally don't see the error code on some of these error paths. It means we return ERR_PTR(0) which is NULL and it results in a NULL dereference in the caller. This bug dates to pre-git days. Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 70dacaf9044e..4ce315c92b48 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2239,6 +2239,7 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } + result = -ENOMEM; if (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) -- cgit From 6031e079aa4656743298ea235b894ee883f45c71 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:47:22 +0300 Subject: IB/i40iw: Fix error code in i40iw_create_cq() We accidentally forgot to set the error code if ib_copy_from_udata() fails. It means we return ERR_PTR(0) which is NULL and results in a NULL dereference in the callers. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Dan Carpenter Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 4aa0264ccb3a..02d871db7ca5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1168,8 +1168,10 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, memset(&req, 0, sizeof(req)); iwcq->user_mode = true; ucontext = to_ucontext(context); - if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) + if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) { + err_code = -EFAULT; goto cq_free_resources; + } spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer, -- cgit From 6ebedacbb44602d4dec3348dee5ec31dd9b09521 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:47:40 +0300 Subject: cxgb4: Fix error codes in c4iw_create_cq() If one of these kmalloc() calls fails then we return ERR_PTR(0) which is NULL. It results in a NULL dereference in the callers. Fixes: cfdda9d76436 ("RDMA/cxgb4: Add driver for Chelsio T4 RNIC") Signed-off-by: Dan Carpenter Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e16fcaf6b5a3..be07da1997e6 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -963,6 +963,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, goto err3; if (ucontext) { + ret = -ENOMEM; mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) goto err4; -- cgit From 9064d6055c14f700aa13f7c72fd3e63d12bee643 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:48:00 +0300 Subject: IB/cxgb3: Fix error codes in iwch_alloc_mr() We accidentally don't set the error code on some error paths. It means return ERR_PTR(0) which is NULL and results in a NULL dereference in the caller. Fixes: 13a239330abd ("RDMA/cxgb3: Don't ignore insert_handle() failures") Signed-off-by: Dan Carpenter Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 29d30744d6c9..0cd0c1fa27d4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -718,7 +718,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, struct iwch_mr *mhp; u32 mmid; u32 stag = 0; - int ret = 0; + int ret = -ENOMEM; if (mr_type != IB_MR_TYPE_MEM_REG || max_num_sg > T3_MAX_FASTREG_DEPTH) @@ -731,10 +731,8 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, goto err; mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mhp->pages) { - ret = -ENOMEM; + if (!mhp->pages) goto pl_err; - } mhp->rhp = rhp; ret = iwch_alloc_pbl(mhp, max_num_sg); @@ -751,7 +749,8 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, mhp->attr.state = 1; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) + ret = insert_handle(rhp, &rhp->mmidr, mhp, mmid); + if (ret) goto err3; pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); -- cgit From dd75cfa6d3216c79c695f5af13e52208afe374ad Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:46:14 +0300 Subject: RDMA/ocrdma: Fix an error code in ocrdma_alloc_pd() We should preserve the original "status" error code instead of resetting it to zero. Returning ERR_PTR(0) is the same as NULL and results in a NULL dereference in the callers. I added a printk() on error instead. Fixes: 45e86b33ec8b ("RDMA/ocrdma: Cache recv DB until QP moved to RTR") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2f30bda8457a..cc317e858040 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -744,7 +744,8 @@ err: if (is_uctx_pd) { ocrdma_release_ucontext_pd(uctx); } else { - status = _ocrdma_dealloc_pd(dev, pd); + if (_ocrdma_dealloc_pd(dev, pd)) + pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__); } exit: return ERR_PTR(status); -- cgit From f0c6e88288d65c93bbc7da4fb6f7d51b2733228a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:46:49 +0300 Subject: RDMA/ocrdma: Fix error codes in ocrdma_create_srq() If either of these allocations fail then we return ERR_PTR(0). That's equivalent to NULL and results in a NULL pointer dereference in the caller. Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index cc317e858040..27d5e8d9f08d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1902,6 +1902,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, goto err; if (udata == NULL) { + status = -ENOMEM; srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt, GFP_KERNEL); if (srq->rqe_wr_id_tbl == NULL) -- cgit From 396551eb00e46aa8f843c448bced0c76971ec58c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:20:09 +0300 Subject: IB/mlx5: Fix a warning message "umem" is a valid pointer. We intended to print "*umem" or even just "err" instead. Signed-off-by: Dan Carpenter Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8ab2f1360a45..2c40a2e989d2 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -835,7 +835,7 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, access_flags, 0); err = PTR_ERR_OR_ZERO(*umem); if (err < 0) { - mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); + mlx5_ib_err(dev, "umem get failed (%d)\n", err); return err; } -- cgit From b3b2c7c5506bd8d15214136f8b11a2e6c9728033 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 29 Jun 2017 12:28:08 -0700 Subject: RDMA/bnxt_re: Free doorbell page index (DPI) during dealloc ucontext The driver must free the DPI during the dealloc_ucontext instead of freeing it during dealloc_pd. However, the DPI allocation scheme remains unchanged. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 58 ++++++++++++++++---------------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 +- 2 files changed, 30 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e794b0fa4ae6..e743ffd392c6 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -612,30 +612,13 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd) int rc; bnxt_re_destroy_fence_mr(pd); - if (ib_pd->uobject && pd->dpi.dbr) { - struct ib_ucontext *ib_uctx = ib_pd->uobject->context; - struct bnxt_re_ucontext *ucntx; - /* Free DPI only if this is the first PD allocated by the - * application and mark the context dpi as NULL - */ - ucntx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx); - - rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res, - &rdev->qplib_res.dpi_tbl, - &pd->dpi); + if (pd->qplib_pd.id) { + rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res, + &rdev->qplib_res.pd_tbl, + &pd->qplib_pd); if (rc) - dev_err(rdev_to_dev(rdev), "Failed to deallocate HW DPI"); - /* Don't fail, continue*/ - ucntx->dpi = NULL; - } - - rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res, - &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD"); - return rc; + dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD"); } kfree(pd); @@ -667,23 +650,22 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, if (udata) { struct bnxt_re_pd_resp resp; - if (!ucntx->dpi) { + if (!ucntx->dpi.dbr) { /* Allocate DPI in alloc_pd to avoid failing of * ibv_devinfo and family of application when DPIs * are depleted. */ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl, - &pd->dpi, ucntx)) { + &ucntx->dpi, ucntx)) { rc = -ENOMEM; goto dbfail; } - ucntx->dpi = &pd->dpi; } resp.pdid = pd->qplib_pd.id; /* Still allow mapping this DBR to the new user PD. */ - resp.dpi = ucntx->dpi->dpi; - resp.dbr = (u64)ucntx->dpi->umdbr; + resp.dpi = ucntx->dpi.dpi; + resp.dbr = (u64)ucntx->dpi.umdbr; rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (rc) { @@ -960,7 +942,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, qplib_qp->rq.nmap = umem->nmap; } - qplib_qp->dpi = cntx->dpi; + qplib_qp->dpi = &cntx->dpi; return 0; rqfail: ib_umem_release(qp->sumem); @@ -2403,7 +2385,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, } cq->qplib_cq.sghead = cq->umem->sg_head.sgl; cq->qplib_cq.nmap = cq->umem->nmap; - cq->qplib_cq.dpi = uctx->dpi; + cq->qplib_cq.dpi = &uctx->dpi; } else { cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL); cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe), @@ -3388,8 +3370,26 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) struct bnxt_re_ucontext *uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx); + + struct bnxt_re_dev *rdev = uctx->rdev; + int rc = 0; + if (uctx->shpg) free_page((unsigned long)uctx->shpg); + + if (uctx->dpi.dbr) { + /* Free DPI only if this is the first PD allocated by the + * application and mark the context dpi as NULL + */ + rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res, + &rdev->qplib_res.dpi_tbl, + &uctx->dpi); + if (rc) + dev_err(rdev_to_dev(rdev), "Deallocte HW DPI failed!"); + /* Don't fail, continue*/ + uctx->dpi.dbr = NULL; + } + kfree(uctx); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 6c160f6a5398..a0bb7e33d7ca 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -59,7 +59,6 @@ struct bnxt_re_pd { struct bnxt_re_dev *rdev; struct ib_pd ib_pd; struct bnxt_qplib_pd qplib_pd; - struct bnxt_qplib_dpi dpi; struct bnxt_re_fence_data fence; }; @@ -127,7 +126,7 @@ struct bnxt_re_mw { struct bnxt_re_ucontext { struct bnxt_re_dev *rdev; struct ib_ucontext ib_uctx; - struct bnxt_qplib_dpi *dpi; + struct bnxt_qplib_dpi dpi; void *shpg; spinlock_t sh_lock; /* protect shpg */ }; -- cgit From ab69d4c8da38024191f3514c1296e9e8deea4e98 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 29 Jun 2017 12:28:09 -0700 Subject: RDMA/bnxt_re: Fix WQE Size posted to HW to prevent it from throwing error Posting WQE size of 2 results in a WQE_FORMAT_ERROR thrown by the HW as it requires host to supply WQE Size with room for atleast one SGE so that the resulting WQE size be atleast 3. Signed-off-by: Somnath Kotur Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f05500bcdcf1..8ef39df2f44f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1128,6 +1128,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } /* Each SGE entry = 1 WQE size16 */ wqe_size16 = wqe->num_sge; + /* HW requires wqe size has room for atleast one SGE even if + * none was supplied by ULP + */ + if (!wqe->num_sge) + wqe_size16++; } /* Specifics */ @@ -1364,6 +1369,11 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, rqe->flags = wqe->flags; rqe->wqe_size = wqe->num_sge + ((offsetof(typeof(*rqe), data) + 15) >> 4); + /* HW requires wqe size has room for atleast one SGE even if none + * was supplied by ULP + */ + if (!wqe->num_sge) + rqe->wqe_size++; /* Supply the rqe->wr_id index to the wr_id_tbl for now */ rqe->wr_id[0] = cpu_to_le32(sw_prod); -- cgit From 4a62c5e9e2e1c15ceb1654715d9284d97f921119 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 29 Jun 2017 12:28:11 -0700 Subject: RDMA/bnxt_re: Do not free the ctx_tbl entry if delete GID fails This fix is added only to avoid system crash in some a specific scenario. When bnxt_re driver is loaded and if user tries to change interface mac address, delete GID fails because QP1 is still associated with existing MAC (default GID). If the above command fails GID tables are not modified in the h/w or driver, but the GID context memory is freed. Now, if the user changes the mac back to the original value, another add_gid comes to the driver where the driver reports that the GID is already present in its table and tries to access the context which was already freed. So, in this case, in order to avoid NULL pointer de-reference, this patch removes the context memory free if delete_gid fails and the same context memory is re-used in new add_gid. Memory cleanup will be taken care during driver unload, while deleting the GID table. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e743ffd392c6..0cd8372989ce 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -390,15 +390,17 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, return -EINVAL; ctx->refcnt--; if (!ctx->refcnt) { - rc = bnxt_qplib_del_sgid - (sgid_tbl, - &sgid_tbl->tbl[ctx->idx], true); - if (rc) + rc = bnxt_qplib_del_sgid(sgid_tbl, + &sgid_tbl->tbl[ctx->idx], + true); + if (rc) { dev_err(rdev_to_dev(rdev), "Failed to remove GID: %#x", rc); - ctx_tbl = sgid_tbl->ctx; - ctx_tbl[ctx->idx] = NULL; - kfree(ctx); + } else { + ctx_tbl = sgid_tbl->ctx; + ctx_tbl[ctx->idx] = NULL; + kfree(ctx); + } } } else { return -EINVAL; -- cgit From 58d4a671d0eac45db1c7f27c8684c277249ac127 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 29 Jun 2017 12:28:12 -0700 Subject: RDMA/bnxt_re: Report supported value to IB stack in query_device - Report supported value for max_mr_size to IB stack in query_device. Also, check and log if MR size requested by application in reg_user_mr() is greater than value currently supported by driver. - Report only 4K page size support for now - Fix Max_QP value returned by ibv_devinfo -vv. In case of PF, FW reserves 129 QPs for creating QP1s of VFs and PF. So the max_qp value reported by FW for PF doesn'tt include the QP1. Fixing this issue by adding 1 with the value reported by FW. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 ++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 12 ++++++++---- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 2 ++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 08772836fded..4cb257dc61be 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -51,6 +51,8 @@ #define BNXT_RE_PAGE_SIZE_8M BIT(23) #define BNXT_RE_PAGE_SIZE_1G BIT(30) +#define BNXT_RE_MAX_MR_SIZE BIT(30) + #define BNXT_RE_MAX_QPC_COUNT (64 * 1024) #define BNXT_RE_MAX_MRW_COUNT (64 * 1024) #define BNXT_RE_MAX_SRQC_COUNT (64 * 1024) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0cd8372989ce..4e3e5b91d855 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -145,10 +145,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver); bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ib_attr->sys_image_guid); - ib_attr->max_mr_size = ~0ull; - ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_8K | - BNXT_RE_PAGE_SIZE_64K | BNXT_RE_PAGE_SIZE_2M | - BNXT_RE_PAGE_SIZE_8M | BNXT_RE_PAGE_SIZE_1G; + ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; + ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K; ib_attr->vendor_id = rdev->en_dev->pdev->vendor; ib_attr->vendor_part_id = rdev->en_dev->pdev->device; @@ -3229,6 +3227,12 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct scatterlist *sg; int entry; + if (length > BNXT_RE_MAX_MR_SIZE) { + dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n", + length, BNXT_RE_MAX_MR_SIZE); + return ERR_PTR(-ENOMEM); + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index fde18cf0e406..5827573875d4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -81,6 +81,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, /* Extract the context from the side buffer */ attr->max_qp = le32_to_cpu(sb->max_qp); + /* max_qp value reported by FW for PF doesn't include the QP1 for PF */ + attr->max_qp += 1; attr->max_qp_rd_atom = sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom; -- cgit From a25d112fe9c8e8817cde1df17a82aee472c55993 Mon Sep 17 00:00:00 2001 From: Eddie Wai Date: Thu, 29 Jun 2017 12:28:13 -0700 Subject: RDMA/bnxt_re: Fixed the max_rd_atomic support for initiator and destination QP There's a couple of bugs in the support of max_rd_atomic and max_dest_rd_atomic. In the modify_qp, if the requested max_rd_atomic, which is the ORRQ size, is greater than what the chip can support, then we have to cap the request to chip max as we can't have the HW overflow the ORRQ. Capping the max_rd_atomic support internally is okay to do as the remaining read/atomic WRs will still be sitting in the SQ. However, for the max_dest_rd_atomic, the driver has to error out as this dictates the IRRQ size and we can't control what the remote side sends. Signed-off-by: Eddie Wai Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4e3e5b91d855..4d3cdca03c02 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -172,7 +172,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->max_mr = dev_attr->max_mr; ib_attr->max_pd = dev_attr->max_pd; ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom; - ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_rd_atom; + ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom; ib_attr->atomic_cap = IB_ATOMIC_HCA; ib_attr->masked_atomic_cap = IB_ATOMIC_HCA; @@ -1512,13 +1512,24 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC; - qp->qplib_qp.max_rd_atomic = qp_attr->max_rd_atomic; + /* Cap the max_rd_atomic to device max */ + qp->qplib_qp.max_rd_atomic = min_t(u32, qp_attr->max_rd_atomic, + dev_attr->max_qp_rd_atom); } if (qp_attr_mask & IB_QP_SQ_PSN) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN; qp->qplib_qp.sq.psn = qp_attr->sq_psn; } if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (qp_attr->max_dest_rd_atomic > + dev_attr->max_qp_init_rd_atom) { + dev_err(rdev_to_dev(rdev), + "max_dest_rd_atomic requested%d is > dev_max%d", + qp_attr->max_dest_rd_atomic, + dev_attr->max_qp_init_rd_atom); + return -EINVAL; + } + qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC; qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic; -- cgit From 536f092805cecc5c2c0ba7f051a7552619bd4491 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 29 Jun 2017 12:28:14 -0700 Subject: RDMA/bnxt_re: Specify RDMA component when allocating stats context Starting FW version 20.6.47, firmware is keeping separate statistics for L2 and RDMA. However, driver needs to specify RDMA or not when allocating stat_ctx. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 1fce5e73216b..ceae2d92fb08 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -333,6 +333,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); + req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); -- cgit From 254cd2590d408f2c0375fbf73ef8250d47529e7b Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 29 Jun 2017 12:28:16 -0700 Subject: RDMA/bnxt_re: Enable atomics only if host bios supports Driver shall check if the host system bios has enabled Atomic operations capability in PCI Device Control 2 register of the pci-device. Expose the ATOMIC_HCA flag only if the Atomic operations capability is set. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 ++++-- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 14 ++++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_sp.h | 3 +++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4d3cdca03c02..7b17030d2696 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -173,8 +173,10 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->max_pd = dev_attr->max_pd; ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom; ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom; - ib_attr->atomic_cap = IB_ATOMIC_HCA; - ib_attr->masked_atomic_cap = IB_ATOMIC_HCA; + if (dev_attr->is_atomic) { + ib_attr->atomic_cap = IB_ATOMIC_HCA; + ib_attr->masked_atomic_cap = IB_ATOMIC_HCA; + } ib_attr->max_ee_rd_atom = 0; ib_attr->max_res_rd_atom = 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 5827573875d4..ef91ab786dd4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -51,6 +51,19 @@ const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /* Device */ + +static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) +{ + int rc; + u16 pcie_ctl2; + + rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, + &pcie_ctl2); + if (rc) + return false; + return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ); +} + int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_dev_attr *attr) { @@ -131,6 +144,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc); } + attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw); bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); return rc; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index a543f959098b..2ce7e2a32cf0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -42,6 +42,8 @@ #define BNXT_QPLIB_RESERVED_QP_WRS 128 +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 + struct bnxt_qplib_dev_attr { char fw_ver[32]; u16 max_sgid; @@ -70,6 +72,7 @@ struct bnxt_qplib_dev_attr { u32 max_inline_data; u32 l2_db_size; u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ]; + bool is_atomic; }; struct bnxt_qplib_pd { -- cgit From 10d1dedf9b428ed776f244eb8b43f28974da3fb0 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 29 Jun 2017 12:28:17 -0700 Subject: RDMA/bnxt_re: Fix return value of poll routine Fix the incorrect reporting of number of polled entries by taking into account the max CQ depth in the driver. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 7b17030d2696..b92a06d17186 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2900,6 +2900,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->cq_lock, flags); budget = min_t(u32, num_entries, cq->max_cql); + num_entries = budget; if (!cq->cql) { dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use"); goto exit; -- cgit From 499e456981d376ef614e257d5dcb280581db9ac6 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 29 Jun 2017 12:28:18 -0700 Subject: RDMA/bnxt_re: Report MISSED_EVENTS in req_notify_cq While invoking the req_notify_cq hook, ULPs can request whether the CQs have any CQEs pending. If CQEs are pending, drivers can indicate it by returning 1 for req_notify_cq. The stack will poll CQ again till CQ is empty. This patch peeks the CQ for any valid entries and return accordingly. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 +++++ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 19 +++++++++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 1 + 3 files changed, 25 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b92a06d17186..d5aa5a1a7e00 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3027,6 +3027,11 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq, else if (ib_cqn_flags & IB_CQ_SOLICITED) type = DBR_DBR_TYPE_CQ_ARMSE; + /* Poll to see if there are missed events */ + if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) && + !(bnxt_qplib_is_cq_empty(&cq->qplib_cq))) + return 1; + bnxt_qplib_req_notify_cq(&cq->qplib_cq, type); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 8ef39df2f44f..9af1514e5944 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1895,6 +1895,25 @@ flush_rq: return rc; } +bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) +{ + struct cq_base *hw_cqe, **hw_cqe_ptr; + unsigned long flags; + u32 sw_cons, raw_cons; + bool rc = true; + + spin_lock_irqsave(&cq->hwq.lock, flags); + raw_cons = cq->hwq.cons; + sw_cons = HWQ_CMP(raw_cons, &cq->hwq); + hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; + hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; + + /* Check for Valid bit. If the CQE is valid, return false */ + rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); + spin_unlock_irqrestore(&cq->hwq.lock, flags); + return rc; +} + static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, struct cq_res_raweth_qp1 *hwcqe, struct bnxt_qplib_cqe **pcqe, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 36b7b7db0e3f..19176e06c98a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -449,6 +449,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq); int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq); int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num, struct bnxt_qplib_qp **qp); +bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq); void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type); void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq); -- cgit From 601577b7d14cba5848b4c44ef719881f8538f702 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 29 Jun 2017 12:28:19 -0700 Subject: RDMA/bnxt_re: Fix the value reported for local ack delay Local ack delay exposed by the driver is 0 which means infinite QP timeout. Reporting the default value to 16 (approx 260ms) Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 7 +++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 4cb257dc61be..85527532c49d 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -62,6 +62,13 @@ #define BNXT_RE_RQ_WQE_THRESHOLD 32 +/* + * Setting the default ack delay value to 16, which means + * the default timeout is approx. 260ms(4 usec * 2 ^(timeout)) + */ + +#define BNXT_RE_DEFAULT_ACK_DELAY 16 + struct bnxt_re_work { struct work_struct work; unsigned long event; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d5aa5a1a7e00..f0e01b3ac711 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -201,7 +201,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS; ib_attr->max_pkeys = 1; - ib_attr->local_ca_ack_delay = 0; + ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY; return 0; } -- cgit From 266098b841d48f7f0db40424bdbc072e4db14e9b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 8 Jun 2017 17:30:47 +0300 Subject: IB/core: Fix sparse warnings Delete unused variables to prevent sparse warnings. Fixes: db1b5ddd5336 ("IB/core: Rename uverbs event file structure") Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema") Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3f55d18a3791..71451eae42de 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1296,7 +1296,6 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_cq *cq; struct ib_ucq_object *obj; - struct ib_uverbs_event_queue *ev_queue; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1313,7 +1312,6 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, */ uverbs_uobject_get(uobj); cq = uobj->object; - ev_queue = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); memset(&resp, 0, sizeof(resp)); @@ -2088,7 +2086,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; - struct ib_qp *qp; struct ib_uqp_object *obj; int ret = -EINVAL; @@ -2102,7 +2099,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (IS_ERR(uobj)) return PTR_ERR(uobj); - qp = uobj->object; obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); /* * Make sure we don't free the memory in remove_commit as we still @@ -3004,7 +3000,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, { struct ib_uverbs_ex_destroy_wq cmd = {}; struct ib_uverbs_ex_destroy_wq_resp resp = {}; - struct ib_wq *wq; struct ib_uobject *uobj; struct ib_uwq_object *obj; size_t required_cmd_sz; @@ -3038,7 +3033,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, if (IS_ERR(uobj)) return PTR_ERR(uobj); - wq = uobj->object; obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); /* * Make sure we don't free the memory in remove_commit as we still @@ -3728,10 +3722,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; - struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; - enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -3741,9 +3733,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (IS_ERR(uobj)) return PTR_ERR(uobj); - srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); - srq_type = srq->srq_type; /* * Make sure we don't free the memory in remove_commit as we still * needs the uobject memory to create the response. -- cgit From a25ce4270bfdd522207b02f81a594c7d1746b697 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sat, 17 Jun 2017 10:37:26 -0700 Subject: IB/rdmavt: Setting of QP timeout can overflow jiffies computation Current computation of qp->timeout_jiffies in rvt_modify_qp() will cause overflow due to the fact that the input to the function usecs_to_jiffies is only 32-bit ( unsigned int). Overflow will occur when attr->timeout is equal to or greater than 30. The consequence is unnecessarily excessive retry and thus degradation of the system performance. This patch fixes the problem by limiting the input to 5-bit and calling usecs_to_jiffies() before multiplying the scaling factor. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 459865439a0b..8876ee7bc326 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1258,9 +1258,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_TIMEOUT) { qp->timeout = attr->timeout; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); + qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout); } if (attr_mask & IB_QP_QKEY) -- cgit From 4542e3c79a2c5a167cbeb4f4190d5f705d272002 Mon Sep 17 00:00:00 2001 From: Håkon Bugge Date: Tue, 20 Jun 2017 14:07:50 +0200 Subject: IB/mlx4: Fix CM REQ retries in paravirt mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CM REQs cannot be successfully retried, because a new pv_cm_id is created for each request, without checking if one already exists. By checking if an id exists before creating one, the bug is fixed. This bug can be provoked by running an RDMA CM user-land application, but inserting a five seconds delay before the rdma_accept() call on the passive side. This delay is larger than the default CMA timeout, and triggers a retry from the active side. The retried REQ will use another pv_cm_id (the cm_id on the wire). This confuses the CM protocol and two REJs are sent from the passive side. Here is an excerpt from ibdump running without the patch: 3.285092 LID: 4 -> LID: 4 SDP 290 CM: ConnectRequest(SDP Hello) 7.382711 LID: 4 -> LID: 4 SDP 290 CM: ConnectRequest(SDP Hello) 7.382861 LID: 4 -> LID: 4 InfiniBand 290 CM: ConnectReject 7.387644 LID: 4 -> LID: 4 InfiniBand 290 CM: ConnectReject and here is the same with bug fix applied: 3.251010 LID: 4 -> LID: 4 SDP 290 CM: ConnectRequest(SDP Hello) 7.349387 LID: 4 -> LID: 4 SDP 290 CM: ConnectRequest(SDP Hello) 8.258443 LID: 4 -> LID: 4 SDP 290 CM: ConnectReply(SDP Hello) 8.259890 LID: 4 -> LID: 4 InfiniBand 290 CM: ReadyToUse Suggested-by: Venkat Venkatsubra Signed-off-by: Håkon Bugge Reported-by: Wei Lin Guay Tested-by: Wei Lin Guay Reviewed-by: Yuval Shaia Acked-by: Jack Morgenstein Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 1e6c526450d9..fedaf8260105 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -323,6 +323,9 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id mad->mad_hdr.attr_id == CM_REP_ATTR_ID || mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { sl_cm_id = get_local_comm_id(mad); + id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); + if (id) + goto cont; id = id_map_alloc(ibdev, slave_id, sl_cm_id); if (IS_ERR(id)) { mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", @@ -343,6 +346,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id return -EINVAL; } +cont: set_local_comm_id(mad, id->pv_cm_id); if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) -- cgit From 720336c42e41a917002fcae3aa14e30f5022bbb7 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 21 Jun 2017 19:55:43 +0530 Subject: iw_cxgb4: don't use WR keys/addrs for 0 byte reads Only use the read sge lkey/addr and the remote rkey/addr if the length of the read is not zero. Otherwise the read response might be treated as the RTR read response and not delivered to the application. Or worse Terminator hardware will fail a 0B read if the STAG is 0 even if the read length is 0. Signed-off-by: Steve Wise Signed-off-by: Ganesh Goudar Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index bfc77596acbe..cb7fc0d35d1d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -569,7 +569,7 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { if (wr->num_sge > 1) return -EINVAL; - if (wr->num_sge) { + if (wr->num_sge && wr->sg_list[0].length) { wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr >> 32)); -- cgit From c75d3ec8c0ee469de79ae83c1a827d753603e49f Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:04 +0300 Subject: RDMA/qedr: Prevent memory overrun in verbs' user responses Wrap ib_copy_to_udata with a function that ensures that the data being copied over to user space isn't longer than the allowed. Fixes: cecbcddf6461 ("qedr: Add support for QP verbs") Fixes: a7efd7773e31 ("qedr: Add support for PD,PKEY and CQ verbs") Fixes: ac1b36e55a51 ("qedr: Add support for user context verbs") Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 548e4d1e998f..2ae71b8f1ba8 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -53,6 +53,14 @@ #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) +static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, + size_t len) +{ + size_t min_len = min_t(size_t, len, udata->outlen); + + return ib_copy_to_udata(udata, src, min_len); +} + int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { if (index > QEDR_ROCE_PKEY_TABLE_LEN) @@ -378,7 +386,7 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, uresp.sges_per_srq_wr = dev->attr.max_srq_sge; uresp.max_cqes = QEDR_MAX_CQES; - rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) goto err; @@ -499,7 +507,7 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, uresp.pd_id = pd_id; - rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) { DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); @@ -729,7 +737,7 @@ static int qedr_copy_cq_uresp(struct qedr_dev *dev, uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); uresp.icid = cq->icid; - rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid); @@ -1238,7 +1246,7 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev, uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; uresp.qp_id = qp->qp_id; - rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) DP_ERR(dev, "create qp: failed a copy to user space with qp icid=0x%x.\n", -- cgit From 1217197142d1681a8b8aaa88cdf4b245b76974cd Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Tue, 27 Jun 2017 12:19:38 +0300 Subject: rxe: fix broken receive queue draining If we modified the qp to ERROR state, and drained the recieve queue, post_recv must trigger the responder task to complete the drain work request. Cc: Bart Van Assche Signed-off-by: Vijay Immanuel Signed-off-by: Sagi Grimberg Reviewed-by: Bart Van Assche -- Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 3 +++ drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index be944d5aa9af..a958ee918a49 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1219,6 +1219,9 @@ void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) kfree_skb(skb); } + if (notify) + return; + while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue)) advance_consumer(qp->rq.queue); } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 07511718d98d..af90a7d42b96 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -914,6 +914,9 @@ static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_unlock_irqrestore(&rq->producer_lock, flags); + if (qp->resp.state == QP_STATE_ERROR) + rxe_run_task(&qp->resp.task, 1); + err1: return err; } -- cgit From e6e52aec494900912fedd7b595b8827ba70a670d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Jul 2017 10:21:36 +0300 Subject: RDMA/iser: don't send an rkey if all data is written as immadiate-data We might get some bogus error completions in case the target will remotely invalidate the rkey and the HCA will need to retransmit from this buffer. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iser_initiator.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 12ed62ce9ff7..2a07692007bd 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -137,8 +137,10 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (unsol_sz < edtl) { hdr->flags |= ISER_WSV; - hdr->write_stag = cpu_to_be32(mem_reg->rkey); - hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz); + if (buf_out->data_len > imm_sz) { + hdr->write_stag = cpu_to_be32(mem_reg->rkey); + hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz); + } iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " "VA:%#llX + unsol:%d\n", -- cgit From 5a7a88f1b488e4ee49eb3d5b82612d4d9ffdf2c3 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Fri, 14 Jul 2017 09:41:30 -0500 Subject: RDMA/uverbs: Fix the check for port number The port number is only valid if IB_QP_PORT is set in the mask. So only check port number if it is valid to prevent modify_qp from failing due to an invalid port number. Fixes: 5ecce4c9b17b("Check port number supplied by user verbs cmds") Cc: # v2.6.14+ Reviewed-by: Steve Wise Signed-off-by: Mustafa Ismail Tested-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 71451eae42de..2c98533a0203 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1933,7 +1933,8 @@ static int modify_qp(struct ib_uverbs_file *file, goto out; } - if (!rdma_is_port_valid(qp->device, cmd->base.port_num)) { + if ((cmd->base.attr_mask & IB_QP_PORT) && + !rdma_is_port_valid(qp->device, cmd->base.port_num)) { ret = -EINVAL; goto release_qp; } -- cgit From a62ab66b13a0f9bcb17b7b761f6670941ed5cd62 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Fri, 14 Jul 2017 09:41:31 -0500 Subject: RDMA/core: Initialize port_num in qp_attr Initialize the port_num for iWARP in rdma_init_qp_attr. Fixes: 5ecce4c9b17b("Check port number supplied by user verbs cmds") Cc: # v2.6.14+ Reviewed-by: Steve Wise Signed-off-by: Mustafa Ismail Tested-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 11aff923b633..0eb393237ba2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1033,6 +1033,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); + qp_attr->port_num = id_priv->id.port_num; + *qp_attr_mask |= IB_QP_PORT; } else ret = -ENOSYS; -- cgit From edf3f301db7af7e784d06f7059dfc8a69359af13 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 10 Jul 2017 18:45:41 +0300 Subject: IB/ipoib: Fix race between light events and interface restart A potential race between light_event and interface restart may attach multicast group to an already attached QP. Scenario: light_event flow goes through ipoib_mcast_dev_flush function, if a context switch occurs before calling ipoib_mcast_remove_list, then we may face a situation where the broadcast of the priv is null and the corresponding QP is not detached yet. If an "interface restart" runs during the previous context switch, the following scenario occurs: When the device goes up, ipoib_ib_dev_up function will be called, it will send a new registration request to the broadcast group and then attach the group to the QP that was not detached before. IPOIB_FLUSH_LIGHT INTERFACE RESTART __ipoib_ib_dev_flush | | | | | | | ipoib_mcast_dev_flush | Move mcast list and broadcast to remove_list | | | | | Context Switch--> | | ipoib_ib_dev_down | | | | | ipoib_ib_dev_up | | | | | ipoib_mcast_join_task | allocate new broadcast | | | | | Attach QP to multicast group | | | | | <--Context Switch ipoib_mcast_leave Detach QP from multicast group Signed-off-by: Feras Daoud Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 ++ 3 files changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ff50a7bd66d8..7ac25059c40f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -336,6 +336,7 @@ struct ipoib_dev_priv { unsigned long flags; struct rw_semaphore vlan_rwsem; + struct mutex mcast_mutex; struct rb_root path_tree; struct list_head path_list; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4ce315c92b48..144187b407bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1877,6 +1877,7 @@ static void ipoib_build_priv(struct net_device *dev) priv->dev = dev; spin_lock_init(&priv->lock); init_rwsem(&priv->vlan_rwsem); + mutex_init(&priv->mcast_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 057f58e6afca..0a0b2ce45cbc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -838,6 +838,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) struct ipoib_mcast *mcast, *tmcast; unsigned long flags; + mutex_lock(&priv->mcast_mutex); ipoib_dbg_mcast(priv, "flushing multicast list\n"); spin_lock_irqsave(&priv->lock, flags); @@ -865,6 +866,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) wait_for_completion(&mcast->done); ipoib_mcast_remove_list(&remove_list); + mutex_unlock(&priv->mcast_mutex); } static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) -- cgit From 6bdc8de2e86e717124a715ecc480892a2c331ff5 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 12 Jul 2017 10:40:25 +0300 Subject: IB/ipoib: Use cancel_delayed_work_sync when needed The work mcast_task can re-queue itself, so instead of doing cancel && flush_workqueue, that still can leave a queued task on the air, use cancel_delayed_work_sync. Also, no need to use lock over the cancel, the original lock was due to bit assignment setting (IPOIB_MCAST_RUN) that is not in use anymore. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0a0b2ce45cbc..f80bf0f5d7cf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -684,15 +684,10 @@ void ipoib_mcast_start_thread(struct net_device *dev) int ipoib_mcast_stop_thread(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - unsigned long flags; ipoib_dbg_mcast(priv, "stopping multicast thread\n"); - spin_lock_irqsave(&priv->lock, flags); - cancel_delayed_work(&priv->mcast_task); - spin_unlock_irqrestore(&priv->lock, flags); - - flush_workqueue(priv->wq); + cancel_delayed_work_sync(&priv->mcast_task); return 0; } -- cgit From a08e1120627f72e9ed7c291e3b9f8dd29c1513ab Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 12 Jul 2017 13:11:54 +0300 Subject: IB/ipoib: Make sure no in-flight joins while leaving that mcast While cleaning neighs and there is a send-only mcast neigh, the driver should wait to finish its join process before trying to remove it. Without this patch, we will see messages like: "ipoib_mcast_leave on an in-flight join" and unexpected results in the join_complete. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index f80bf0f5d7cf..93e149efc1f5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -743,6 +743,14 @@ void ipoib_mcast_remove_list(struct list_head *remove_list) { struct ipoib_mcast *mcast, *tmcast; + /* + * make sure the in-flight joins have finished before we attempt + * to leave + */ + list_for_each_entry_safe(mcast, tmcast, remove_list, list) + if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) + wait_for_completion(&mcast->done); + list_for_each_entry_safe(mcast, tmcast, remove_list, list) { ipoib_mcast_leave(mcast->dev, mcast); ipoib_mcast_free(mcast); @@ -852,14 +860,6 @@ void ipoib_mcast_dev_flush(struct net_device *dev) spin_unlock_irqrestore(&priv->lock, flags); - /* - * make sure the in-flight joins have finished before we attempt - * to leave - */ - list_for_each_entry_safe(mcast, tmcast, &remove_list, list) - if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) - wait_for_completion(&mcast->done); - ipoib_mcast_remove_list(&remove_list); mutex_unlock(&priv->mcast_mutex); } @@ -979,14 +979,6 @@ void ipoib_mcast_restart_task(struct work_struct *work) netif_addr_unlock(dev); local_irq_restore(flags); - /* - * make sure the in-flight joins have finished before we attempt - * to leave - */ - list_for_each_entry_safe(mcast, tmcast, &remove_list, list) - if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) - wait_for_completion(&mcast->done); - ipoib_mcast_remove_list(&remove_list); /* -- cgit From 11f74b40359b19f760964e71d04882a6caf530cc Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 13 Jul 2017 11:27:12 +0300 Subject: IB/ipoib: Prevent setting negative values to max_nonsrq_conn_qp Don't allow negative values to max_nonsrq_conn_qp. There is no functional impact on a negative value but it is logicically incorrect. Fixes: 68e995a29572 ("IPoIB/cm: Add connected mode support for devices without SRQs") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 144187b407bd..8b7ec15a9d6e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2366,6 +2366,7 @@ static int __init ipoib_init_module(void) ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); + ipoib_max_conn_qp = max(ipoib_max_conn_qp, 0); #endif /* -- cgit From d2e46fccc3e3d73a741efe433f00960331280696 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Sun, 16 Jul 2017 11:33:01 +0300 Subject: IB/ipoib: Set IPOIB_NEIGH_TBL_FLUSH after flushed completion initialization Set IPOIB_NEIGH_TBL_FLUSH bit after initializing the neighbor flushed completion, otherwise the garbage collector may signal a completion while it is not initialized yet. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Feras Daoud Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8b7ec15a9d6e..f4403c52cd67 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1560,6 +1560,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) int i, wait_flushed = 0; init_completion(&priv->ntbl.flushed); + set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); spin_lock_irqsave(&priv->lock, flags); @@ -1604,7 +1605,6 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); init_completion(&priv->ntbl.deleted); - set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); /* Stop GC if called at init fail need to cancel work */ stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); -- cgit From 4829d964dfb027558c732cfa0d13b716ab3f0838 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 10 Jul 2017 18:12:43 +0300 Subject: IB/ipoib: Add multicast packets statistics Update the multicast counter when multicast packets are received and provide this information through ethtool support. Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 3 ++- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 7871379342f4..184a22f48027 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -52,7 +52,8 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = { IPOIB_NETDEV_STAT(tx_bytes), IPOIB_NETDEV_STAT(tx_errors), IPOIB_NETDEV_STAT(rx_dropped), - IPOIB_NETDEV_STAT(tx_dropped) + IPOIB_NETDEV_STAT(tx_dropped), + IPOIB_NETDEV_STAT(multicast), }; #define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 57a9655e844d..02eda1f53a67 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -256,6 +256,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; + if (skb->pkt_type == PACKET_MULTICAST) + dev->stats.multicast++; skb->dev = dev; if ((dev->features & NETIF_F_RXCSUM) && -- cgit From eb54714ddcb2462d4d4b8aa78d028b61e217a835 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Sun, 2 Jul 2017 15:05:59 +0300 Subject: IB/ipoib: Add get statistics support to SRIOV VF Add SRIOV VF support to get traffic statistics. Signed-off-by: Feras Daoud Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f4403c52cd67..24fa87fe0952 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1847,6 +1847,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_tx_timeout = ipoib_timeout, .ndo_set_rx_mode = ipoib_set_mcast_list, .ndo_get_iflink = ipoib_get_iflink, + .ndo_get_stats64 = ipoib_get_stats, }; void ipoib_setup_common(struct net_device *dev) -- cgit From dc892e17bbae670a3d7aa6ab8bd1033b15b24645 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 13 Jul 2017 13:34:19 +0300 Subject: IB/ipoib: Clean error paths in add port Refactor error paths in ipoib_add_port() function. The code flow ensures that the function terminates on every error flow and it makes redundant all "else" cases. The functions are called during the flow are returning "result < 0", in case of error, so there is no need to check it explicitly. Fixes: 58e9cc90cda7 ("IB/IPoIB: Fix bad error flow in ipoib_add_port()") Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 24fa87fe0952..6c77df34869d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2175,14 +2175,14 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->dev_id = port - 1; result = ib_query_port(hca, port, &attr); - if (!result) - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); - else { + if (result) { printk(KERN_WARNING "%s: ib_query_port %d failed\n", hca->name, port); goto device_init_failed; } + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; @@ -2213,12 +2213,14 @@ static struct net_device *ipoib_add_port(const char *format, printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; - } else - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + } + + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, + sizeof(union ib_gid)); set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); result = ipoib_dev_init(priv->dev, hca, port); - if (result < 0) { + if (result) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; -- cgit From 1b355094b308f3377c8f574ce86135ee159c6285 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 15 Jul 2017 16:26:55 +0300 Subject: IB/ipoib: Remove double pointer assigning There is no need to assign "p" pointer twice. This patch fixes the following smatch warning: drivers/infiniband/ulp/ipoib/ipoib_cm.c:517 ipoib_cm_rx_handler() warn: missing break? reassigning 'p->id' Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support") Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index f87d104837dc..d69410c2ed97 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -511,7 +511,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, case IB_CM_REQ_RECEIVED: return ipoib_cm_req_handler(cm_id, event); case IB_CM_DREQ_RECEIVED: - p = cm_id->context; ib_send_cm_drep(cm_id, NULL, 0); /* Fall through */ case IB_CM_REJ_RECEIVED: -- cgit From b287b76e89503ef1d403cc5cc8bd74b035d25bfa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 23 Jul 2017 10:46:14 +0300 Subject: Revert "IB/core: Allow QP state transition from reset to error" The commit ebc9ca43e1d5 ("IB/core: Allow QP state transition from reset to error") allowed transition from Reset to Error state for the QPs. This behavior doesn't follow the IBTA specification 1.3, which in 10.3.1 QUEUE PAIR AND EE CONTEXT STATES section. The quote from the spec: "An error can be forced from any state, except Reset, with the Modify QP/EE Verb." Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index fb98ed67d5bc..7f8fe443df46 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -895,7 +895,6 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, - [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { -- cgit From 5dc78ad1904db597bdb4427f3ead437aae86f54c Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 13 Jul 2017 14:29:08 +0300 Subject: IB/ipoib: Notify on modify QP failure only when relevant Modify QP can fail and it can be acceptable, like when moving from RST to ERR state, all the rest are not acceptable and a message to the log should be printed. The current code prints on all failures and many messages like: "Failed to modify QP to ERROR state" appear, even when supported by the state machine of the QP object. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 02eda1f53a67..2e075377242e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -711,6 +711,27 @@ static int recvs_pending(struct net_device *dev) return pending; } +static void check_qp_movement_and_print(struct ipoib_dev_priv *priv, + struct ib_qp *qp, + enum ib_qp_state new_state) +{ + struct ib_qp_attr qp_attr; + struct ib_qp_init_attr query_init_attr; + int ret; + + ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr); + if (ret) { + ipoib_warn(priv, "%s: Failed to query QP\n", __func__); + return; + } + /* print according to the new-state and the previous state.*/ + if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET) + ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n"); + else + ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n", + new_state, qp_attr.qp_state); +} + int ipoib_ib_dev_stop_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -730,7 +751,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) */ qp_attr.qp_state = IB_QPS_ERR; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) - ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); + check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR); /* Wait for all sends and receives to complete */ begin = jiffies; -- cgit From 5fff41e1f89d93feef9833c49a415dc337af5a99 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 1 Aug 2017 09:41:34 +0300 Subject: IB/core: Fix race condition in resolving IP to MAC Currently while resolving IP address to MAC address single delayed work is used for resolving multiple such resolve requests. This singled work is essentially performs two tasks. (a) any retry needed to resolve and (b) it executes the callback function for all completed requests While work is executing callbacks, any new work scheduled on for this workqueue is lost because workqueue has completed looking at all pending requests and now looking at callbacks, but work is still under execution. Any further retry to look at pending requests in process_req() after executing callbacks would lead to similar race condition (may be reduce the probably further but doesn't eliminate it). Retrying to enqueue work that from queue_req() context is not something rest of the kernel modules have followed. Therefore fix in this patch utilizes kernel facility to enqueue multiple work items to a workqueue. This ensures that no such requests gets lost in synchronization. Request list is still maintained so that rdma_cancel_addr() can unlink the request and get the completion with error sooner. Neighbour update event handling continues to be handled in same way as before. Additionally process_req() work entry cancels any pending work for a request that gets completed while processing those requests. Originally ib_addr was ST workqueue, but it became MT work queue with patch of [1]. This patch again makes it similar to ST so that neighbour update events handler work item doesn't race with other work items. In one such below trace, (though on 4.5 based kernel) it can be seen that process_req() never executed the callback, which is likely for an event that was schedule by queue_req() when previous callback was getting executed by workqueue. [] schedule+0x3e/0x90 [] schedule_timeout+0x1b5/0x210 [] ? ip_route_output_flow+0x27/0x70 [] ? addr_resolve+0x149/0x1b0 [ib_addr] [] wait_for_completion+0x10f/0x170 [] ? try_to_wake_up+0x210/0x210 [] ? rdma_copy_addr+0xa0/0xa0 [ib_addr] [] rdma_addr_find_l2_eth_by_grh+0x1d0/0x278 [ib_addr] [] ? sub_alloc+0x77/0x1c0 [] ib_init_ah_from_wc+0x3a7/0x5a0 [ib_core] [] cm_req_handler+0xea/0x580 [ib_cm] [] ? __switch_to+0x212/0x5e0 [] cm_work_handler+0x6d/0x150 [ib_cm] [] process_one_work+0x151/0x4b0 [] worker_thread+0x120/0x480 [] ? __schedule+0x30b/0x890 [] ? process_one_work+0x4b0/0x4b0 [] ? process_one_work+0x4b0/0x4b0 [] kthread+0xce/0xf0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x42/0x70 [] ? kthread_freezable_should_stop+0x70/0x70 INFO: task kworker/u144:1:156520 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u144:1 D ffff883ffe1d7600 0 156520 2 0x00000080 Workqueue: ib_addr process_req [ib_addr] ffff883f446fbbd8 0000000000000046 ffff881f95280000 ffff881ff24de200 ffff883f66120000 ffff883f446f8008 ffff881f95280000 ffff883f6f9208c4 ffff883f6f9208c8 00000000ffffffff ffff883f446fbbf8 ffffffff816b0dde [1] http://lkml.iu.edu/hypermail/linux/kernel/1608.1/05834.html Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 62 ++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 01236cef7bfb..437522ca97b4 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -61,6 +61,7 @@ struct addr_req { void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); unsigned long timeout; + struct delayed_work work; int status; u32 seq; }; @@ -295,7 +296,7 @@ int rdma_translate_ip(const struct sockaddr *addr, } EXPORT_SYMBOL(rdma_translate_ip); -static void set_timeout(unsigned long time) +static void set_timeout(struct delayed_work *delayed_work, unsigned long time) { unsigned long delay; @@ -303,7 +304,7 @@ static void set_timeout(unsigned long time) if ((long)delay < 0) delay = 0; - mod_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, delayed_work, delay); } static void queue_req(struct addr_req *req) @@ -318,8 +319,7 @@ static void queue_req(struct addr_req *req) list_add(&req->list, &temp_req->list); - if (req_list.next == &req->list) - set_timeout(req->timeout); + set_timeout(&req->work, req->timeout); mutex_unlock(&lock); } @@ -574,6 +574,37 @@ static int addr_resolve(struct sockaddr *src_in, return ret; } +static void process_one_req(struct work_struct *_work) +{ + struct addr_req *req; + struct sockaddr *src_in, *dst_in; + + mutex_lock(&lock); + req = container_of(_work, struct addr_req, work.work); + + if (req->status == -ENODATA) { + src_in = (struct sockaddr *)&req->src_addr; + dst_in = (struct sockaddr *)&req->dst_addr; + req->status = addr_resolve(src_in, dst_in, req->addr, + true, req->seq); + if (req->status && time_after_eq(jiffies, req->timeout)) { + req->status = -ETIMEDOUT; + } else if (req->status == -ENODATA) { + /* requeue the work for retrying again */ + set_timeout(&req->work, req->timeout); + mutex_unlock(&lock); + return; + } + } + list_del(&req->list); + mutex_unlock(&lock); + + req->callback(req->status, (struct sockaddr *)&req->src_addr, + req->addr, req->context); + put_client(req->client); + kfree(req); +} + static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; @@ -591,20 +622,23 @@ static void process_req(struct work_struct *work) true, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; - else if (req->status == -ENODATA) + else if (req->status == -ENODATA) { + set_timeout(&req->work, req->timeout); continue; + } } list_move_tail(&req->list, &done_list); } - if (!list_empty(&req_list)) { - req = list_entry(req_list.next, struct addr_req, list); - set_timeout(req->timeout); - } mutex_unlock(&lock); list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); + /* It is safe to cancel other work items from this work item + * because at a time there can be only one work item running + * with this single threaded work queue. + */ + cancel_delayed_work(&req->work); req->callback(req->status, (struct sockaddr *) &req->src_addr, req->addr, req->context); put_client(req->client); @@ -647,6 +681,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->context = context; req->client = client; atomic_inc(&client->refcount); + INIT_DELAYED_WORK(&req->work, process_one_req); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); @@ -701,7 +736,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) req->status = -ECANCELED; req->timeout = jiffies; list_move(&req->list, &req_list); - set_timeout(req->timeout); + set_timeout(&req->work, req->timeout); break; } } @@ -807,9 +842,8 @@ static int netevent_callback(struct notifier_block *self, unsigned long event, if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; - if (neigh->nud_state & NUD_VALID) { - set_timeout(jiffies); - } + if (neigh->nud_state & NUD_VALID) + set_timeout(&work, jiffies); } return 0; } @@ -820,7 +854,7 @@ static struct notifier_block nb = { int addr_init(void) { - addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0); + addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM); if (!addr_wq) return -ENOMEM; -- cgit From f7a6cb7b38c6845b26aaa8bbdf519ff6e3090831 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Aug 2017 09:41:35 +0300 Subject: RDMA/uverbs: Prevent leak of reserved field initialize to zero the response structure to prevent the leakage of "resp.reserved" field. drivers/infiniband/core/uverbs_cmd.c:1178 ib_uverbs_resize_cq() warn: check that 'resp.reserved' doesn't leak information Fixes: 33b9b3ee9709 ("IB: Add userspace support for resizing CQs") Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c98533a0203..c551d2b275fd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1153,7 +1153,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_resize_cq cmd; - struct ib_uverbs_resize_cq_resp resp; + struct ib_uverbs_resize_cq_resp resp = {}; struct ib_udata udata; struct ib_cq *cq; int ret = -EINVAL; -- cgit From efdd6f53b10aead0f5cf19a93dd3eb268ac0d991 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 1 Aug 2017 09:41:36 +0300 Subject: IB/uverbs: Fix device cleanup Uverbs device should be cleaned up only when there is no potential usage of. As part of ib_uverbs_remove_one which might be triggered upon reset flow the device reference count is decreased as expected and leave the final cleanup to the FDs that were opened. Current code increases reference count upon opening a new command FD and decreases it upon closing the file. The event FD is opened internally and rely on the command FD by taking on it a reference count. In case that the command FD was closed and just later the event FD we may ensure that the device resources as of srcu are still alive as they are still in use. Fixing the above by moving the reference count decreasing to the place where the command FD is really freed instead of doing that when it was just closed. fixes: 036b10635739 ("IB/uverbs: Enable device removal when there are active user space applications") Signed-off-by: Yishai Hadas Reviewed-by: Matan Barak Reviewed-by: Jason Gunthorpe Tested-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 3d2609608f58..c023e2c81b8f 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -250,6 +250,7 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + kobject_put(&file->device->kobj); kfree(file); } @@ -917,7 +918,6 @@ err: static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - struct ib_uverbs_device *dev = file->device; mutex_lock(&file->cleanup_mutex); if (file->ucontext) { @@ -939,7 +939,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) ib_uverbs_release_async_event_file); kref_put(&file->ref, ib_uverbs_release_file); - kobject_put(&dev->kobj); return 0; } -- cgit From 931b3c1a832621b4bdcbaf783096fc267eb36fbe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Aug 2017 09:41:37 +0300 Subject: RDMA/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); -- cgit From 5db465f235e74293e285e1fa924a55e52ba52a98 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Aug 2017 11:12:08 +0300 Subject: IB/hns: checking for IS_ERR() instead of NULL The hns_roce_v1_create_lp_qp() returns NULL on error, not error pointers. Fixes: bfcc681bd09d ("IB/hns: Fix the bug when free mr") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 23fad6d96944..2540b65e242c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -733,7 +733,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) continue; free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); - if (IS_ERR(free_mr->mr_free_qp[i])) { + if (!free_mr->mr_free_qp[i]) { dev_err(dev, "Create loop qp failed!\n"); goto create_lp_qp_failed; } -- cgit