summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/infiniband/sysfs.txt3
-rw-r--r--MAINTAINERS16
-rw-r--r--drivers/infiniband/Kconfig3
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/device.c15
-rw-r--r--drivers/infiniband/core/sa_query.c5
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c17
-rw-r--r--drivers/infiniband/core/verbs.c40
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig7
-rw-r--r--drivers/infiniband/hw/i40iw/Makefile9
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h570
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4141
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h456
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c4743
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1713
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.c821
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.h241
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c730
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c1910
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h215
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_p.h106
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c618
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.h131
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c1436
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h183
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_register.h1030
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h100
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h1312
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ucontext.h107
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c1204
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h442
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c1270
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c2437
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h173
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.c85
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.h62
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c748
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.h124
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c194
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c12
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h8
-rw-r--r--drivers/infiniband/hw/qib/Kconfig2
-rw-r--r--drivers/infiniband/hw/qib/Makefile10
-rw-r--r--drivers/infiniband/hw/qib/qib.h33
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c545
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c71
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c25
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c186
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c338
-rw-r--r--drivers/infiniband/hw/qib/qib_mmap.c174
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c490
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c1178
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c409
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c191
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_srq.c380
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c85
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c79
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c142
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c1223
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h812
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs_mcast.c363
-rw-r--r--drivers/infiniband/sw/Makefile1
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig6
-rw-r--r--drivers/infiniband/sw/rdmavt/Makefile13
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c196
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h59
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c (renamed from drivers/staging/rdma/hfi1/cq.c)325
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h64
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c184
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.h53
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c171
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.h60
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c (renamed from drivers/staging/rdma/hfi1/verbs_mcast.c)262
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.h58
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.c (renamed from drivers/staging/rdma/hfi1/mmap.c)142
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.h63
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c830
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.h92
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c119
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.h58
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c1696
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h69
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c (renamed from drivers/staging/rdma/hfi1/srq.c)204
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.h62
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.c49
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h187
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c873
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h104
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c65
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/Makefile1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h22
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.c1012
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.h232
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c115
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl.h34
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c247
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c72
-rw-r--r--drivers/staging/rdma/hfi1/Kconfig13
-rw-r--r--drivers/staging/rdma/hfi1/Makefile10
-rw-r--r--drivers/staging/rdma/hfi1/affinity.c430
-rw-r--r--drivers/staging/rdma/hfi1/affinity.h91
-rw-r--r--drivers/staging/rdma/hfi1/aspm.h309
-rw-r--r--drivers/staging/rdma/hfi1/chip.c2456
-rw-r--r--drivers/staging/rdma/hfi1/chip.h151
-rw-r--r--drivers/staging/rdma/hfi1/chip_registers.h9
-rw-r--r--drivers/staging/rdma/hfi1/common.h12
-rw-r--r--drivers/staging/rdma/hfi1/debugfs.c332
-rw-r--r--drivers/staging/rdma/hfi1/debugfs.h5
-rw-r--r--drivers/staging/rdma/hfi1/device.c5
-rw-r--r--drivers/staging/rdma/hfi1/device.h5
-rw-r--r--drivers/staging/rdma/hfi1/diag.c103
-rw-r--r--drivers/staging/rdma/hfi1/dma.c17
-rw-r--r--drivers/staging/rdma/hfi1/driver.c344
-rw-r--r--drivers/staging/rdma/hfi1/efivar.c5
-rw-r--r--drivers/staging/rdma/hfi1/efivar.h5
-rw-r--r--drivers/staging/rdma/hfi1/eprom.c117
-rw-r--r--drivers/staging/rdma/hfi1/eprom.h7
-rw-r--r--drivers/staging/rdma/hfi1/file_ops.c552
-rw-r--r--drivers/staging/rdma/hfi1/firmware.c587
-rw-r--r--drivers/staging/rdma/hfi1/hfi.h251
-rw-r--r--drivers/staging/rdma/hfi1/init.c183
-rw-r--r--drivers/staging/rdma/hfi1/intr.c29
-rw-r--r--drivers/staging/rdma/hfi1/iowait.h126
-rw-r--r--drivers/staging/rdma/hfi1/keys.c356
-rw-r--r--drivers/staging/rdma/hfi1/mad.c1005
-rw-r--r--drivers/staging/rdma/hfi1/mad.h14
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.c292
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.h73
-rw-r--r--drivers/staging/rdma/hfi1/mr.c473
-rw-r--r--drivers/staging/rdma/hfi1/opa_compat.h20
-rw-r--r--drivers/staging/rdma/hfi1/pcie.c192
-rw-r--r--drivers/staging/rdma/hfi1/pio.c365
-rw-r--r--drivers/staging/rdma/hfi1/pio.h118
-rw-r--r--drivers/staging/rdma/hfi1/pio_copy.c67
-rw-r--r--drivers/staging/rdma/hfi1/platform.c893
-rw-r--r--drivers/staging/rdma/hfi1/platform.h (renamed from drivers/staging/rdma/hfi1/platform_config.h)58
-rw-r--r--drivers/staging/rdma/hfi1/qp.c1642
-rw-r--r--drivers/staging/rdma/hfi1/qp.h198
-rw-r--r--drivers/staging/rdma/hfi1/qsfp.c270
-rw-r--r--drivers/staging/rdma/hfi1/qsfp.h57
-rw-r--r--drivers/staging/rdma/hfi1/rc.c765
-rw-r--r--drivers/staging/rdma/hfi1/ruc.c373
-rw-r--r--drivers/staging/rdma/hfi1/sdma.c365
-rw-r--r--drivers/staging/rdma/hfi1/sdma.h116
-rw-r--r--drivers/staging/rdma/hfi1/sdma_txreq.h135
-rw-r--r--drivers/staging/rdma/hfi1/sysfs.c136
-rw-r--r--drivers/staging/rdma/hfi1/trace.c53
-rw-r--r--drivers/staging/rdma/hfi1/trace.h1477
-rw-r--r--drivers/staging/rdma/hfi1/twsi.c205
-rw-r--r--drivers/staging/rdma/hfi1/twsi.h9
-rw-r--r--drivers/staging/rdma/hfi1/uc.c164
-rw-r--r--drivers/staging/rdma/hfi1/ud.c248
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.c1044
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.h13
-rw-r--r--drivers/staging/rdma/hfi1/user_pages.c74
-rw-r--r--drivers/staging/rdma/hfi1/user_sdma.c621
-rw-r--r--drivers/staging/rdma/hfi1/user_sdma.h11
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c1663
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h824
-rw-r--r--drivers/staging/rdma/hfi1/verbs_txreq.c149
-rw-r--r--drivers/staging/rdma/hfi1/verbs_txreq.h116
-rw-r--r--include/linux/mlx5/device.h6
-rw-r--r--include/linux/mlx5/driver.h8
-rw-r--r--include/linux/mlx5/mlx5_ifc.h37
-rw-r--r--include/linux/mlx5/vport.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/rdma/ib_verbs.h29
-rw-r--r--include/rdma/opa_port_info.h2
-rw-r--r--include/rdma/rdma_vt.h481
-rw-r--r--include/rdma/rdmavt_cq.h99
-rw-r--r--include/rdma/rdmavt_mr.h139
-rw-r--r--include/rdma/rdmavt_qp.h446
-rw-r--r--include/uapi/linux/if_link.h7
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h14
-rw-r--r--include/uapi/rdma/rdma_netlink.h1
-rw-r--r--net/core/rtnetlink.c36
192 files changed, 49074 insertions, 15213 deletions
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt
index 9028b025501a..3ecf0c3a133f 100644
--- a/Documentation/infiniband/sysfs.txt
+++ b/Documentation/infiniband/sysfs.txt
@@ -78,9 +78,10 @@ HFI1
chip_reset - diagnostic (root only)
boardversion - board version
ports/1/
- CMgtA/
+ CCMgtA/
cc_settings_bin - CCA tables used by PSM2
cc_table_bin
+ cc_prescan - enable prescaning for faster BECN response
sc2v/ - 32 files (0 - 31) used to translate sl->vl
sl2sc/ - 32 files (0 - 31) used to translate sl->sc
vl2mtu/ - 16 (0 - 15) files used to determine MTU for vl
diff --git a/MAINTAINERS b/MAINTAINERS
index 0f3063cce44c..32bafda47c2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5770,6 +5770,16 @@ F: Documentation/networking/i40evf.txt
F: drivers/net/ethernet/intel/
F: drivers/net/ethernet/intel/*/
+INTEL RDMA RNIC DRIVER
+M: Faisal Latif <faisal.latif@intel.com>
+R: Chien Tin Tung <chien.tin.tung@intel.com>
+R: Mustafa Ismail <mustafa.ismail@intel.com>
+R: Shiraz Saleem <shiraz.saleem@intel.com>
+R: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
+L: linux-rdma@vger.kernel.org
+S: Supported
+F: drivers/infiniband/hw/i40iw/
+
INTEL-MID GPIO DRIVER
M: David Cohen <david.a.cohen@linux.intel.com>
L: linux-gpio@vger.kernel.org
@@ -9224,6 +9234,12 @@ S: Supported
F: net/rds/
F: Documentation/networking/rds.txt
+RDMAVT - RDMA verbs software
+M: Dennis Dalessandro <dennis.dalessandro@intel.com>
+L: linux-rdma@vger.kernel.org
+S: Supported
+F: drivers/infiniband/sw/rdmavt
+
READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
M: Josh Triplett <josh@joshtriplett.org>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 8a8440c0eed1..6425c0e5d18a 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -68,6 +68,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
+source "drivers/infiniband/hw/i40iw/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
@@ -82,4 +83,6 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/sw/rdmavt/Kconfig"
+
endif # INFINIBAND
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index dc21836b5a8d..fad0b44c356f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_INFINIBAND) += core/
obj-$(CONFIG_INFINIBAND) += hw/
obj-$(CONFIG_INFINIBAND) += ulp/
+obj-$(CONFIG_INFINIBAND) += sw/
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 270c7ff6cba7..10979844026a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -650,10 +650,23 @@ int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
+ union ib_gid gid;
+ int err;
+
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
- return device->query_port(device, port_num, port_attr);
+ memset(port_attr, 0, sizeof(*port_attr));
+ err = device->query_port(device, port_num, port_attr);
+ if (err || port_attr->subnet_prefix)
+ return err;
+
+ err = ib_query_gid(device, port_num, 0, &gid, NULL);
+ if (err)
+ return err;
+
+ port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+ return 0;
}
EXPORT_SYMBOL(ib_query_port);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index b5656a2298ee..8a09c0fb268d 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -885,6 +885,11 @@ static void update_sm_ah(struct work_struct *work)
ah_attr.dlid = port_attr.sm_lid;
ah_attr.sl = port_attr.sm_sl;
ah_attr.port_num = port->port_num;
+ if (port_attr.grh_required) {
+ ah_attr.ah_flags = IB_AH_GRH;
+ ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
+ ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+ }
new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
if (IS_ERR(new_ah->ah)) {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3638c787cb7c..6fdc7ecdaca0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -402,7 +402,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = attr->device_cap_flags;
+ resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->max_sge = attr->max_sge;
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
@@ -3600,9 +3600,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
- struct ib_uverbs_ex_query_device_resp resp;
+ struct ib_uverbs_ex_query_device_resp resp = { {0} };
struct ib_uverbs_ex_query_device cmd;
- struct ib_device_attr attr;
+ struct ib_device_attr attr = {0};
int err;
if (ucore->inlen < sizeof(cmd))
@@ -3623,14 +3623,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
if (ucore->outlen < resp.response_length)
return -ENOSPC;
- memset(&attr, 0, sizeof(attr));
-
err = ib_dev->query_device(ib_dev, &attr, uhw);
if (err)
return err;
copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
- resp.comp_mask = 0;
if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
goto end;
@@ -3643,9 +3640,6 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
attr.odp_caps.per_transport_caps.uc_odp_caps;
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
- resp.odp_caps.reserved = 0;
-#else
- memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
#endif
resp.response_length += sizeof(resp.odp_caps);
@@ -3663,8 +3657,5 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- if (err)
- return err;
-
- return 0;
+ return err;
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5cd1e3987f2b..15b8adbf39c0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1551,6 +1551,46 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
}
EXPORT_SYMBOL(ib_check_mr_status);
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+ int state)
+{
+ if (!device->set_vf_link_state)
+ return -ENOSYS;
+
+ return device->set_vf_link_state(device, vf, port, state);
+}
+EXPORT_SYMBOL(ib_set_vf_link_state);
+
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *info)
+{
+ if (!device->get_vf_config)
+ return -ENOSYS;
+
+ return device->get_vf_config(device, vf, port, info);
+}
+EXPORT_SYMBOL(ib_get_vf_config);
+
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_stats *stats)
+{
+ if (!device->get_vf_stats)
+ return -ENOSYS;
+
+ return device->get_vf_stats(device, vf, port, stats);
+}
+EXPORT_SYMBOL(ib_get_vf_stats);
+
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+ int type)
+{
+ if (!device->set_vf_guid)
+ return -ENOSYS;
+
+ return device->set_vf_guid(device, vf, port, guid, type);
+}
+EXPORT_SYMBOL(ib_set_vf_guid);
+
/**
* ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
* and set it the memory region.
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index aded2a5cc2d5..c7ad0a4c8b15 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
obj-$(CONFIG_INFINIBAND_QIB) += qib/
obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
+obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_NES) += nes/
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
new file mode 100644
index 000000000000..6e7d27a14061
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -0,0 +1,7 @@
+config INFINIBAND_I40IW
+ tristate "Intel(R) Ethernet X722 iWARP Driver"
+ depends on INET && I40E
+ select GENERIC_ALLOCATOR
+ ---help---
+ Intel(R) Ethernet X722 iWARP Driver
+ INET && I40IW && INFINIBAND && I40E
diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
new file mode 100644
index 000000000000..90068c03d217
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/Makefile
@@ -0,0 +1,9 @@
+ccflags-y := -Idrivers/net/ethernet/intel/i40e
+
+obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
+
+i40iw-objs :=\
+ i40iw_cm.o i40iw_ctrl.o \
+ i40iw_hmc.o i40iw_hw.o i40iw_main.o \
+ i40iw_pble.o i40iw_puda.o i40iw_uk.o i40iw_utils.o \
+ i40iw_verbs.o i40iw_virtchnl.o i40iw_vf.o
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
new file mode 100644
index 000000000000..819767681445
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -0,0 +1,570 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_IW_H
+#define I40IW_IW_H
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/crc32c.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+#include <crypto/hash.h>
+
+#include "i40iw_status.h"
+#include "i40iw_osdep.h"
+#include "i40iw_d.h"
+#include "i40iw_hmc.h"
+
+#include <i40e_client.h>
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_ucontext.h"
+#include "i40iw_pble.h"
+#include "i40iw_verbs.h"
+#include "i40iw_cm.h"
+#include "i40iw_user.h"
+#include "i40iw_puda.h"
+
+#define I40IW_FW_VERSION 2
+#define I40IW_HW_VERSION 2
+
+#define I40IW_ARP_ADD 1
+#define I40IW_ARP_DELETE 2
+#define I40IW_ARP_RESOLVE 3
+
+#define I40IW_MACIP_ADD 1
+#define I40IW_MACIP_DELETE 2
+
+#define IW_CCQ_SIZE (I40IW_CQP_SW_SQSIZE_2048 + 1)
+#define IW_CEQ_SIZE 2048
+#define IW_AEQ_SIZE 2048
+
+#define RX_BUF_SIZE (1536 + 8)
+#define IW_REG0_SIZE (4 * 1024)
+#define IW_TX_TIMEOUT (6 * HZ)
+#define IW_FIRST_QPN 1
+#define IW_SW_CONTEXT_ALIGN 1024
+
+#define MAX_DPC_ITERATIONS 128
+
+#define I40IW_EVENT_TIMEOUT 100000
+#define I40IW_VCHNL_EVENT_TIMEOUT 100000
+
+#define I40IW_NO_VLAN 0xffff
+#define I40IW_NO_QSET 0xffff
+
+/* access to mcast filter list */
+#define IW_ADD_MCAST false
+#define IW_DEL_MCAST true
+
+#define I40IW_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
+#define I40IW_DRV_OPT_DISABLE_MPA_CRC 0x00000002
+#define I40IW_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
+#define I40IW_DRV_OPT_DISABLE_INTF 0x00000008
+#define I40IW_DRV_OPT_ENABLE_MSI 0x00000010
+#define I40IW_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
+#define I40IW_DRV_OPT_NO_INLINE_DATA 0x00000080
+#define I40IW_DRV_OPT_DISABLE_INT_MOD 0x00000100
+#define I40IW_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
+#define I40IW_DRV_OPT_ENABLE_PAU 0x00000400
+#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
+
+#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
+#define IW_CFG_FPM_QP_COUNT 32768
+
+#define I40IW_MTU_TO_MSS 40
+#define I40IW_DEFAULT_MSS 1460
+
+struct i40iw_cqp_compl_info {
+ u32 op_ret_val;
+ u16 maj_err_code;
+ u16 min_err_code;
+ bool error;
+ u8 op_code;
+};
+
+#define i40iw_pr_err(fmt, args ...) pr_err("%s: "fmt, __func__, ## args)
+
+#define i40iw_pr_info(fmt, args ...) pr_info("%s: " fmt, __func__, ## args)
+
+#define i40iw_pr_warn(fmt, args ...) pr_warn("%s: " fmt, __func__, ## args)
+
+struct i40iw_cqp_request {
+ struct cqp_commands_info info;
+ wait_queue_head_t waitq;
+ struct list_head list;
+ atomic_t refcount;
+ void (*callback_fcn)(struct i40iw_cqp_request*, u32);
+ void *param;
+ struct i40iw_cqp_compl_info compl_info;
+ bool waiting;
+ bool request_done;
+ bool dynamic;
+};
+
+struct i40iw_cqp {
+ struct i40iw_sc_cqp sc_cqp;
+ spinlock_t req_lock; /*cqp request list */
+ wait_queue_head_t waitq;
+ struct i40iw_dma_mem sq;
+ struct i40iw_dma_mem host_ctx;
+ u64 *scratch_array;
+ struct i40iw_cqp_request *cqp_requests;
+ struct list_head cqp_avail_reqs;
+ struct list_head cqp_pending_reqs;
+};
+
+struct i40iw_device;
+
+struct i40iw_ccq {
+ struct i40iw_sc_cq sc_cq;
+ spinlock_t lock; /* ccq control */
+ wait_queue_head_t waitq;
+ struct i40iw_dma_mem mem_cq;
+ struct i40iw_dma_mem shadow_area;
+};
+
+struct i40iw_ceq {
+ struct i40iw_sc_ceq sc_ceq;
+ struct i40iw_dma_mem mem;
+ u32 irq;
+ u32 msix_idx;
+ struct i40iw_device *iwdev;
+ struct tasklet_struct dpc_tasklet;
+};
+
+struct i40iw_aeq {
+ struct i40iw_sc_aeq sc_aeq;
+ struct i40iw_dma_mem mem;
+};
+
+struct i40iw_arp_entry {
+ u32 ip_addr[4];
+ u8 mac_addr[ETH_ALEN];
+};
+
+enum init_completion_state {
+ INVALID_STATE = 0,
+ INITIAL_STATE,
+ CQP_CREATED,
+ HMC_OBJS_CREATED,
+ PBLE_CHUNK_MEM,
+ CCQ_CREATED,
+ AEQ_CREATED,
+ CEQ_CREATED,
+ ILQ_CREATED,
+ IEQ_CREATED,
+ INET_NOTIFIER,
+ IP_ADDR_REGISTERED,
+ RDMA_DEV_REGISTERED
+};
+
+struct i40iw_msix_vector {
+ u32 idx;
+ u32 irq;
+ u32 cpu_affinity;
+ u32 ceq_id;
+};
+
+#define I40IW_MSIX_TABLE_SIZE 65
+
+struct virtchnl_work {
+ struct work_struct work;
+ union {
+ struct i40iw_cqp_request *cqp_request;
+ struct i40iw_virtchnl_work_info work_info;
+ };
+};
+
+struct i40e_qvlist_info;
+
+struct i40iw_device {
+ struct i40iw_ib_device *iwibdev;
+ struct net_device *netdev;
+ wait_queue_head_t vchnl_waitq;
+ struct i40iw_sc_dev sc_dev;
+ struct i40iw_handler *hdl;
+ struct i40e_info *ldev;
+ struct i40e_client *client;
+ struct i40iw_hw hw;
+ struct i40iw_cm_core cm_core;
+ unsigned long *mem_resources;
+ unsigned long *allocated_qps;
+ unsigned long *allocated_cqs;
+ unsigned long *allocated_mrs;
+ unsigned long *allocated_pds;
+ unsigned long *allocated_arps;
+ struct i40iw_qp **qp_table;
+ bool msix_shared;
+ u32 msix_count;
+ struct i40iw_msix_vector *iw_msixtbl;
+ struct i40e_qvlist_info *iw_qvlist;
+
+ struct i40iw_hmc_pble_rsrc *pble_rsrc;
+ struct i40iw_arp_entry *arp_table;
+ struct i40iw_cqp cqp;
+ struct i40iw_ccq ccq;
+ u32 ceqs_count;
+ struct i40iw_ceq *ceqlist;
+ struct i40iw_aeq aeq;
+ u32 arp_table_size;
+ u32 next_arp_index;
+ spinlock_t resource_lock; /* hw resource access */
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 of_device_registered;
+
+ u32 device_cap_flags;
+ unsigned long db_start;
+ u8 resource_profile;
+ u8 max_rdma_vfs;
+ u8 max_enabled_vfs;
+ u8 max_sge;
+ u8 iw_status;
+ u8 send_term_ok;
+ bool push_mode; /* Initialized from parameter passed to driver */
+
+ /* x710 specific */
+ struct mutex pbl_mutex;
+ struct tasklet_struct dpc_tasklet;
+ struct workqueue_struct *virtchnl_wq;
+ struct virtchnl_work virtchnl_w[I40IW_MAX_PE_ENABLED_VF_COUNT];
+ struct i40iw_dma_mem obj_mem;
+ struct i40iw_dma_mem obj_next;
+ u8 *hmc_info_mem;
+ u32 sd_type;
+ struct workqueue_struct *param_wq;
+ atomic_t params_busy;
+ u32 mss;
+ enum init_completion_state init_state;
+ u16 mac_ip_table_idx;
+ atomic_t vchnl_msgs;
+ u32 max_mr;
+ u32 max_qp;
+ u32 max_cq;
+ u32 max_pd;
+ u32 next_qp;
+ u32 next_cq;
+ u32 next_pd;
+ u32 max_mr_size;
+ u32 max_qp_wr;
+ u32 max_cqe;
+ u32 mr_stagmask;
+ u32 mpa_version;
+ bool dcb;
+};
+
+struct i40iw_ib_device {
+ struct ib_device ibdev;
+ struct i40iw_device *iwdev;
+};
+
+struct i40iw_handler {
+ struct list_head list;
+ struct i40e_client *client;
+ struct i40iw_device device;
+ struct i40e_info ldev;
+};
+
+/**
+ * to_iwdev - get device
+ * @ibdev: ib device
+ **/
+static inline struct i40iw_device *to_iwdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct i40iw_ib_device, ibdev)->iwdev;
+}
+
+/**
+ * to_ucontext - get user context
+ * @ibucontext: ib user context
+ **/
+static inline struct i40iw_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct i40iw_ucontext, ibucontext);
+}
+
+/**
+ * to_iwpd - get protection domain
+ * @ibpd: ib pd
+ **/
+static inline struct i40iw_pd *to_iwpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct i40iw_pd, ibpd);
+}
+
+/**
+ * to_iwmr - get device memory region
+ * @ibdev: ib memory region
+ **/
+static inline struct i40iw_mr *to_iwmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct i40iw_mr, ibmr);
+}
+
+/**
+ * to_iwmr_from_ibfmr - get device memory region
+ * @ibfmr: ib fmr
+ **/
+static inline struct i40iw_mr *to_iwmr_from_ibfmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct i40iw_mr, ibfmr);
+}
+
+/**
+ * to_iwmw - get device memory window
+ * @ibmw: ib memory window
+ **/
+static inline struct i40iw_mr *to_iwmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct i40iw_mr, ibmw);
+}
+
+/**
+ * to_iwcq - get completion queue
+ * @ibcq: ib cqdevice
+ **/
+static inline struct i40iw_cq *to_iwcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct i40iw_cq, ibcq);
+}
+
+/**
+ * to_iwqp - get device qp
+ * @ibqp: ib qp
+ **/
+static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct i40iw_qp, ibqp);
+}
+
+/* i40iw.c */
+void i40iw_add_ref(struct ib_qp *);
+void i40iw_rem_ref(struct ib_qp *);
+struct ib_qp *i40iw_get_qp(struct ib_device *, int);
+
+void i40iw_flush_wqes(struct i40iw_device *iwdev,
+ struct i40iw_qp *qp);
+
+void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
+ unsigned char *mac_addr,
+ __be32 *ip_addr,
+ bool ipv4,
+ u32 action);
+
+int i40iw_manage_apbvt(struct i40iw_device *iwdev,
+ u16 accel_local_port,
+ bool add_port);
+
+struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait);
+void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
+void i40iw_put_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
+
+/**
+ * i40iw_alloc_resource - allocate a resource
+ * @iwdev: device pointer
+ * @resource_array: resource bit array:
+ * @max_resources: maximum resource number
+ * @req_resources_num: Allocated resource number
+ * @next: next free id
+ **/
+static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
+ unsigned long *resource_array,
+ u32 max_resources,
+ u32 *req_resource_num,
+ u32 *next)
+{
+ u32 resource_num;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->resource_lock, flags);
+ resource_num = find_next_zero_bit(resource_array, max_resources, *next);
+ if (resource_num >= max_resources) {
+ resource_num = find_first_zero_bit(resource_array, max_resources);
+ if (resource_num >= max_resources) {
+ spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+ return -EOVERFLOW;
+ }
+ }
+ set_bit(resource_num, resource_array);
+ *next = resource_num + 1;
+ if (*next == max_resources)
+ *next = 0;
+ spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+ *req_resource_num = resource_num;
+
+ return 0;
+}
+
+/**
+ * i40iw_is_resource_allocated - detrmine if resource is
+ * allocated
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to check
+ **/
+static inline bool i40iw_is_resource_allocated(struct i40iw_device *iwdev,
+ unsigned long *resource_array,
+ u32 resource_num)
+{
+ bool bit_is_set;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->resource_lock, flags);
+
+ bit_is_set = test_bit(resource_num, resource_array);
+ spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+
+ return bit_is_set;
+}
+
+/**
+ * i40iw_free_resource - free a resource
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to free
+ **/
+static inline void i40iw_free_resource(struct i40iw_device *iwdev,
+ unsigned long *resource_array,
+ u32 resource_num)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->resource_lock, flags);
+ clear_bit(resource_num, resource_array);
+ spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+}
+
+/**
+ * to_iwhdl - Get the handler from the device pointer
+ * @iwdev: device pointer
+ **/
+static inline struct i40iw_handler *to_iwhdl(struct i40iw_device *iw_dev)
+{
+ return container_of(iw_dev, struct i40iw_handler, device);
+}
+
+struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev);
+
+/**
+ * iw_init_resources -
+ */
+u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev);
+
+int i40iw_register_rdma_device(struct i40iw_device *iwdev);
+void i40iw_port_ibevent(struct i40iw_device *iwdev);
+int i40iw_cm_disconn(struct i40iw_qp *);
+void i40iw_cm_disconn_worker(void *);
+int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
+ struct sk_buff *);
+
+enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
+ struct i40iw_cqp_request *cqp_request);
+enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
+ u8 *mac_addr, u8 *mac_index);
+int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+
+void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
+void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
+void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
+ struct i40iw_modify_qp_info *info, bool wait);
+
+enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cminfo,
+ enum i40iw_quad_entry_type etype,
+ enum i40iw_quad_hash_manage_type mtype,
+ void *cmnode,
+ bool wait);
+void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf);
+void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp);
+void i40iw_free_qp_resources(struct i40iw_device *iwdev,
+ struct i40iw_qp *iwqp,
+ u32 qp_num);
+enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
+ struct i40iw_dma_mem *memptr,
+ u32 size, u32 mask);
+
+void i40iw_request_reset(struct i40iw_device *iwdev);
+void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev);
+void i40iw_setup_cm_core(struct i40iw_device *iwdev);
+void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core);
+void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq);
+void i40iw_process_aeq(struct i40iw_device *);
+void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+ u8 state, u8 del_hash,
+ u8 term, u8 term_len);
+int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack);
+struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
+ u16 rem_port,
+ u32 *rem_addr,
+ u16 loc_port,
+ u32 *loc_addr,
+ bool add_refcnt);
+
+enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
+ struct i40iw_sc_qp *qp,
+ struct i40iw_qp_flush_info *info,
+ bool wait);
+
+void i40iw_copy_ip_ntohl(u32 *dst, u32 *src);
+struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
+ u64 addr,
+ u64 size,
+ int acc,
+ u64 *iova_start);
+
+int i40iw_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr);
+int i40iw_inet6addr_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr);
+int i40iw_net_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr);
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
new file mode 100644
index 000000000000..92745d755272
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -0,0 +1,4141 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/atomic.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/notifier.h>
+#include <linux/net.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <linux/highmem.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/ip_fib.h>
+#include <net/tcp.h>
+#include <asm/checksum.h>
+
+#include "i40iw.h"
+
+static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *);
+static void i40iw_cm_post_event(struct i40iw_cm_event *event);
+static void i40iw_disconnect_worker(struct work_struct *work);
+
+/**
+ * i40iw_free_sqbuf - put back puda buffer if refcount = 0
+ * @dev: FPK device
+ * @buf: puda buffer to free
+ */
+void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp)
+{
+ struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
+ struct i40iw_puda_rsrc *ilq = dev->ilq;
+
+ if (!atomic_dec_return(&buf->refcount))
+ i40iw_puda_ret_bufpool(ilq, buf);
+}
+
+/**
+ * i40iw_derive_hw_ird_setting - Calculate IRD
+ *
+ * @cm_ird: IRD of connection's node
+ *
+ * The ird from the connection is rounded to a supported HW
+ * setting (2,8,32,64) and then encoded for ird_size field of
+ * qp_ctx
+ */
+static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
+{
+ u8 encoded_ird_size;
+ u8 pof2_cm_ird = 1;
+
+ /* round-off to next powerof2 */
+ while (pof2_cm_ird < cm_ird)
+ pof2_cm_ird *= 2;
+
+ /* ird_size field is encoded in qp_ctx */
+ switch (pof2_cm_ird) {
+ case I40IW_HW_IRD_SETTING_64:
+ encoded_ird_size = 3;
+ break;
+ case I40IW_HW_IRD_SETTING_32:
+ case I40IW_HW_IRD_SETTING_16:
+ encoded_ird_size = 2;
+ break;
+ case I40IW_HW_IRD_SETTING_8:
+ case I40IW_HW_IRD_SETTING_4:
+ encoded_ird_size = 1;
+ break;
+ case I40IW_HW_IRD_SETTING_2:
+ default:
+ encoded_ird_size = 0;
+ break;
+ }
+ return encoded_ird_size;
+}
+
+/**
+ * i40iw_record_ird_ord - Record IRD/ORD passed in
+ * @cm_node: connection's node
+ * @conn_ird: connection IRD
+ * @conn_ord: connection ORD
+ */
+static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord)
+{
+ if (conn_ird > I40IW_MAX_IRD_SIZE)
+ conn_ird = I40IW_MAX_IRD_SIZE;
+
+ if (conn_ord > I40IW_MAX_ORD_SIZE)
+ conn_ord = I40IW_MAX_ORD_SIZE;
+
+ cm_node->ird_size = conn_ird;
+ cm_node->ord_size = conn_ord;
+}
+
+/**
+ * i40iw_copy_ip_ntohl - change network to host ip
+ * @dst: host ip
+ * @src: big endian
+ */
+void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src)
+{
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst = ntohl(*src);
+}
+
+/**
+ * i40iw_copy_ip_htonl - change host addr to network ip
+ * @dst: host ip
+ * @src: little endian
+ */
+static inline void i40iw_copy_ip_htonl(__be32 *dst, u32 *src)
+{
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst = htonl(*src);
+}
+
+/**
+ * i40iw_fill_sockaddr4 - get addr info for passive connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_fill_sockaddr4(struct i40iw_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
+
+ laddr->sin_family = AF_INET;
+ raddr->sin_family = AF_INET;
+
+ laddr->sin_port = htons(cm_node->loc_port);
+ raddr->sin_port = htons(cm_node->rem_port);
+
+ laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
+ raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
+}
+
+/**
+ * i40iw_fill_sockaddr6 - get ipv6 addr info for passive side
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_fill_sockaddr6(struct i40iw_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
+
+ laddr6->sin6_family = AF_INET6;
+ raddr6->sin6_family = AF_INET6;
+
+ laddr6->sin6_port = htons(cm_node->loc_port);
+ raddr6->sin6_port = htons(cm_node->rem_port);
+
+ i40iw_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ i40iw_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+}
+
+/**
+ * i40iw_get_addr_info
+ * @cm_node: contains ip/tcp info
+ * @cm_info: to get a copy of the cm_node ip/tcp info
+*/
+static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node,
+ struct i40iw_cm_info *cm_info)
+{
+ cm_info->ipv4 = cm_node->ipv4;
+ cm_info->vlan_id = cm_node->vlan_id;
+ memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
+ memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
+ cm_info->loc_port = cm_node->loc_port;
+ cm_info->rem_port = cm_node->rem_port;
+}
+
+/**
+ * i40iw_get_cmevent_info - for cm event upcall
+ * @cm_node: connection's node
+ * @cm_id: upper layers cm struct for the event
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_get_cmevent_info(struct i40iw_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ struct iw_cm_event *event)
+{
+ memcpy(&event->local_addr, &cm_id->m_local_addr,
+ sizeof(event->local_addr));
+ memcpy(&event->remote_addr, &cm_id->m_remote_addr,
+ sizeof(event->remote_addr));
+ if (cm_node) {
+ event->private_data = (void *)cm_node->pdata_buf;
+ event->private_data_len = (u8)cm_node->pdata.size;
+ event->ird = cm_node->ird_size;
+ event->ord = cm_node->ord_size;
+ }
+}
+
+/**
+ * i40iw_send_cm_event - upcall cm's event handler
+ * @cm_node: connection's node
+ * @cm_id: upper layer's cm info struct
+ * @type: Event type to indicate
+ * @status: status for the event type
+ */
+static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type,
+ int status)
+{
+ struct iw_cm_event event;
+
+ memset(&event, 0, sizeof(event));
+ event.event = type;
+ event.status = status;
+ switch (type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ if (cm_node->ipv4)
+ i40iw_fill_sockaddr4(cm_node, &event);
+ else
+ i40iw_fill_sockaddr6(cm_node, &event);
+ event.provider_data = (void *)cm_node;
+ event.private_data = (void *)cm_node->pdata_buf;
+ event.private_data_len = (u8)cm_node->pdata.size;
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ i40iw_get_cmevent_info(cm_node, cm_id, &event);
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ event.ird = cm_node->ird_size;
+ event.ord = cm_node->ord_size;
+ break;
+ case IW_CM_EVENT_DISCONNECT:
+ break;
+ case IW_CM_EVENT_CLOSE:
+ break;
+ default:
+ i40iw_pr_err("event type received type = %d\n", type);
+ return -1;
+ }
+ return cm_id->event_handler(cm_id, &event);
+}
+
+/**
+ * i40iw_create_event - create cm event
+ * @cm_node: connection's node
+ * @type: Event type to generate
+ */
+static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node,
+ enum i40iw_cm_event_type type)
+{
+ struct i40iw_cm_event *event;
+
+ if (!cm_node->cm_id)
+ return NULL;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+ if (!event)
+ return NULL;
+
+ event->type = type;
+ event->cm_node = cm_node;
+ memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr));
+ memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr));
+ event->cm_info.rem_port = cm_node->rem_port;
+ event->cm_info.loc_port = cm_node->loc_port;
+ event->cm_info.cm_id = cm_node->cm_id;
+
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
+ cm_node,
+ event,
+ type,
+ event->cm_info.loc_addr,
+ event->cm_info.rem_addr);
+
+ i40iw_cm_post_event(event);
+ return event;
+}
+
+/**
+ * i40iw_free_retrans_entry - free send entry
+ * @cm_node: connection's node
+ */
+static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_timer_entry *send_entry;
+
+ send_entry = cm_node->send_entry;
+ if (send_entry) {
+ cm_node->send_entry = NULL;
+ i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf);
+ kfree(send_entry);
+ atomic_dec(&cm_node->ref_count);
+ }
+}
+
+/**
+ * i40iw_cleanup_retrans_entry - free send entry with lock
+ * @cm_node: connection's node
+ */
+static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ i40iw_free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node)
+{
+ if ((cm_node->rem_mac[0] == 0x0) &&
+ (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) ||
+ ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43)))))
+ return true;
+ return false;
+}
+
+/**
+ * i40iw_form_cm_frame - get a free packet and build frame
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
+ struct i40iw_kmem_info *options,
+ struct i40iw_kmem_info *hdr,
+ struct i40iw_kmem_info *pdata,
+ u8 flags)
+{
+ struct i40iw_puda_buf *sqbuf;
+ struct i40iw_sc_dev *dev = cm_node->dev;
+ u8 *buf;
+
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct ethhdr *ethh;
+ u16 packetsize;
+ u16 eth_hlen = ETH_HLEN;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ sqbuf = i40iw_puda_get_bufpool(dev->ilq);
+ if (!sqbuf)
+ return NULL;
+ buf = sqbuf->mem.va;
+
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata) {
+ pd_len = pdata->size;
+ if (!is_remote_ne020_or_chelsio(cm_node))
+ pd_len += MPA_ZERO_PAD_LEN;
+ }
+
+ if (cm_node->vlan_id < VLAN_TAG_PRESENT)
+ eth_hlen += 4;
+
+ if (cm_node->ipv4)
+ packetsize = sizeof(*iph) + sizeof(*tcph);
+ else
+ packetsize = sizeof(*ip6h) + sizeof(*tcph);
+ packetsize += opts_len + hdr_len + pd_len;
+
+ memset(buf, 0x00, eth_hlen + packetsize);
+
+ sqbuf->totallen = packetsize + eth_hlen;
+ sqbuf->maclen = eth_hlen;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = (void *)cm_node;
+
+ ethh = (struct ethhdr *)buf;
+ buf += eth_hlen;
+
+ if (cm_node->ipv4) {
+ sqbuf->ipv4 = true;
+
+ iph = (struct iphdr *)buf;
+ buf += sizeof(*iph);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
+ } else {
+ ethh->h_proto = htons(ETH_P_IP);
+ }
+
+ iph->version = IPVERSION;
+ iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
+ iph->tos = 0;
+ iph->tot_len = htons(packetsize);
+ iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+ iph->frag_off = htons(0x4000);
+ iph->ttl = 0x40;
+ iph->protocol = IPPROTO_TCP;
+ iph->saddr = htonl(cm_node->loc_addr[0]);
+ iph->daddr = htonl(cm_node->rem_addr[0]);
+ } else {
+ sqbuf->ipv4 = false;
+ ip6h = (struct ipv6hdr *)buf;
+ buf += sizeof(*ip6h);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
+ } else {
+ ethh->h_proto = htons(ETH_P_IPV6);
+ }
+ ip6h->version = 6;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
+ ip6h->nexthdr = 6;
+ ip6h->hop_limit = 128;
+ i40iw_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ i40iw_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+ }
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pd_len)
+ memcpy(buf, pdata->addr, pd_len);
+
+ atomic_set(&sqbuf->refcount, 1);
+
+ return sqbuf;
+}
+
+/**
+ * i40iw_send_reset - Send RST packet
+ * @cm_node: connection's node
+ */
+static int i40iw_send_reset(struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_puda_buf *sqbuf;
+ int flags = SET_RST | SET_ACK;
+
+ sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, flags);
+ if (!sqbuf) {
+ i40iw_pr_err("no sqbuf\n");
+ return -1;
+ }
+
+ return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 0, 1);
+}
+
+/**
+ * i40iw_active_open_err - send event for active side cm error
+ * @cm_node: connection's node
+ * @reset: Flag to send reset or not
+ */
+static void i40iw_active_open_err(struct i40iw_cm_node *cm_node, bool reset)
+{
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_connect_errs++;
+ if (reset) {
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "%s cm_node=%p state=%d\n",
+ __func__,
+ cm_node,
+ cm_node->state);
+ atomic_inc(&cm_node->ref_count);
+ i40iw_send_reset(cm_node);
+ }
+
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+}
+
+/**
+ * i40iw_passive_open_err - handle passive side cm error
+ * @cm_node: connection's node
+ * @reset: send reset or just free cm_node
+ */
+static void i40iw_passive_open_err(struct i40iw_cm_node *cm_node, bool reset)
+{
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_passive_errs++;
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "%s cm_node=%p state =%d\n",
+ __func__,
+ cm_node,
+ cm_node->state);
+ if (reset)
+ i40iw_send_reset(cm_node);
+ else
+ i40iw_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * i40iw_event_connect_error - to create connect error event
+ * @event: cm information for connect event
+ */
+static void i40iw_event_connect_error(struct i40iw_cm_event *event)
+{
+ struct i40iw_qp *iwqp;
+ struct iw_cm_id *cm_id;
+
+ cm_id = event->cm_node->cm_id;
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+
+ if (!iwqp || !iwqp->iwdev)
+ return;
+
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ i40iw_send_cm_event(event->cm_node, cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ cm_id->rem_ref(cm_id);
+ i40iw_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * i40iw_process_options
+ * @cm_node: connection's node
+ * @optionsloc: point to start of options
+ * @optionsize: size of all options
+ * @syn_packet: flag if syn packet
+ */
+static int i40iw_process_options(struct i40iw_cm_node *cm_node,
+ u8 *optionsloc,
+ u32 optionsize,
+ u32 syn_packet)
+{
+ u32 tmp;
+ u32 offset = 0;
+ union all_known_options *all_options;
+ char got_mss_option = 0;
+
+ while (offset < optionsize) {
+ all_options = (union all_known_options *)(optionsloc + offset);
+ switch (all_options->as_base.optionnum) {
+ case OPTION_NUMBER_END:
+ offset = optionsize;
+ break;
+ case OPTION_NUMBER_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUMBER_MSS:
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "%s: MSS Length: %d Offset: %d Size: %d\n",
+ __func__,
+ all_options->as_mss.length,
+ offset,
+ optionsize);
+ got_mss_option = 1;
+ if (all_options->as_mss.length != 4)
+ return -1;
+ tmp = ntohs(all_options->as_mss.mss);
+ if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ break;
+ case OPTION_NUMBER_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale =
+ all_options->as_windowscale.shiftcount;
+ break;
+ default:
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "TCP Option not understood: %x\n",
+ all_options->as_base.optionnum);
+ break;
+ }
+ offset += all_options->as_base.length;
+ }
+ if (!got_mss_option && syn_packet)
+ cm_node->tcp_cntxt.mss = I40IW_CM_DEFAULT_MSS;
+ return 0;
+}
+
+/**
+ * i40iw_handle_tcp_options -
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ * @optionsize: size of options rcvd
+ * @passive: active or passive flag
+ */
+static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node,
+ struct tcphdr *tcph,
+ int optionsize,
+ int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
+
+ if (optionsize) {
+ if (i40iw_process_options(cm_node,
+ optionsloc,
+ optionsize,
+ (u32)tcph->syn)) {
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "%s: Node %p, Sending RESET\n",
+ __func__,
+ cm_node);
+ if (passive)
+ i40iw_passive_open_err(cm_node, true);
+ else
+ i40iw_active_open_err(cm_node, true);
+ return -1;
+ }
+ }
+
+ cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+ cm_node->tcp_cntxt.snd_wscale;
+
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+ cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ return 0;
+}
+
+/**
+ * i40iw_build_mpa_v1 - build a MPA V1 frame
+ * @cm_node: connection's node
+ * @mpa_key: to do read0 or write0
+ */
+static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node,
+ void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+ break;
+ case MPA_KEY_REPLY:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+ break;
+ default:
+ break;
+ }
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = cm_node->mpa_frame_rev;
+ mpa_frame->priv_data_len = htons(cm_node->pdata.size);
+}
+
+/**
+ * i40iw_build_mpa_v2 - build a MPA V2 frame
+ * @cm_node: connection's node
+ * @start_addr: buffer start address
+ * @mpa_key: to do read0 or write0
+ */
+static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
+ void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+ struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+
+ /* initialize the upper 5 bytes of the frame */
+ i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
+ mpa_frame->flags |= IETF_MPA_V2_FLAG;
+ mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
+
+ /* initialize RTR msg */
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ rtr_msg->ctrl_ird = IETF_NO_IRD_ORD;
+ rtr_msg->ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD : cm_node->ird_size;
+ rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD : cm_node->ord_size;
+ }
+
+ rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
+ rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+ rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ case MPA_KEY_REPLY:
+ switch (cm_node->send_rdma0_op) {
+ case SEND_RDMA_WRITE_ZERO:
+ rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+ break;
+ case SEND_RDMA_READ_ZERO:
+ rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
+ rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+}
+
+/**
+ * i40iw_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
+ * @cm_node: connection's node
+ * @mpa: mpa: data buffer
+ * @mpa_key: to do read0 or write0
+ */
+static int i40iw_cm_build_mpa_frame(struct i40iw_cm_node *cm_node,
+ struct i40iw_kmem_info *mpa,
+ u8 mpa_key)
+{
+ int hdr_len = 0;
+
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V1:
+ hdr_len = sizeof(struct ietf_mpa_v1);
+ i40iw_build_mpa_v1(cm_node, mpa->addr, mpa_key);
+ break;
+ case IETF_MPA_V2:
+ hdr_len = sizeof(struct ietf_mpa_v2);
+ i40iw_build_mpa_v2(cm_node, mpa->addr, mpa_key);
+ break;
+ default:
+ break;
+ }
+
+ return hdr_len;
+}
+
+/**
+ * i40iw_send_mpa_request - active node send mpa request to passive node
+ * @cm_node: connection's node
+ */
+static int i40iw_send_mpa_request(struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_puda_buf *sqbuf;
+
+ if (!cm_node) {
+ i40iw_pr_err("cm_node == NULL\n");
+ return -1;
+ }
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+ cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REQUEST);
+ if (!cm_node->mpa_hdr.size) {
+ i40iw_pr_err("mpa size = %d\n", cm_node->mpa_hdr.size);
+ return -1;
+ }
+
+ sqbuf = i40iw_form_cm_frame(cm_node,
+ NULL,
+ &cm_node->mpa_hdr,
+ &cm_node->pdata,
+ SET_ACK);
+ if (!sqbuf) {
+ i40iw_pr_err("sq_buf == NULL\n");
+ return -1;
+ }
+ return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_send_mpa_reject -
+ * @cm_node: connection's node
+ * @pdata: reject data for connection
+ * @plen: length of reject data
+ */
+static int i40iw_send_mpa_reject(struct i40iw_cm_node *cm_node,
+ const void *pdata,
+ u8 plen)
+{
+ struct i40iw_puda_buf *sqbuf;
+ struct i40iw_kmem_info priv_info;
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+ cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REPLY);
+
+ cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
+ priv_info.addr = (void *)pdata;
+ priv_info.size = plen;
+
+ sqbuf = i40iw_form_cm_frame(cm_node,
+ NULL,
+ &cm_node->mpa_hdr,
+ &priv_info,
+ SET_ACK | SET_FIN);
+ if (!sqbuf) {
+ i40iw_pr_err("no sqbuf\n");
+ return -ENOMEM;
+ }
+ cm_node->state = I40IW_CM_STATE_FIN_WAIT1;
+ return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * recv_mpa - process an IETF MPA frame
+ * @cm_node: connection's node
+ * @buffer: Data pointer
+ * @type: to return accept or reject
+ * @len: Len of mpa buffer
+ */
+static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, u32 len)
+{
+ struct ietf_mpa_v1 *mpa_frame;
+ struct ietf_mpa_v2 *mpa_v2_frame;
+ struct ietf_rtr_msg *rtr_msg;
+ int mpa_hdr_len;
+ int priv_data_len;
+
+ *type = I40IW_MPA_REQUEST_ACCEPT;
+
+ if (len < sizeof(struct ietf_mpa_v1)) {
+ i40iw_pr_err("ietf buffer small (%x)\n", len);
+ return -1;
+ }
+
+ mpa_frame = (struct ietf_mpa_v1 *)buffer;
+ mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+ priv_data_len = ntohs(mpa_frame->priv_data_len);
+
+ if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+ i40iw_pr_err("large pri_data %d\n", priv_data_len);
+ return -1;
+ }
+ if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+ i40iw_pr_err("unsupported mpa rev = %d\n", mpa_frame->rev);
+ return -1;
+ }
+ if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+ i40iw_pr_err("rev %d\n", mpa_frame->rev);
+ return -1;
+ }
+ cm_node->mpa_frame_rev = mpa_frame->rev;
+
+ if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+ i40iw_pr_err("Unexpected MPA Key received\n");
+ return -1;
+ }
+ } else {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+ i40iw_pr_err("Unexpected MPA Key received\n");
+ return -1;
+ }
+ }
+
+ if (priv_data_len + mpa_hdr_len > len) {
+ i40iw_pr_err("ietf buffer len(%x + %x != %x)\n",
+ priv_data_len, mpa_hdr_len, len);
+ return -1;
+ }
+ if (len > MAX_CM_BUFFER) {
+ i40iw_pr_err("ietf buffer large len = %d\n", len);
+ return -1;
+ }
+
+ switch (mpa_frame->rev) {
+ case IETF_MPA_V2:{
+ u16 ird_size;
+ u16 ord_size;
+ u16 ctrl_ord;
+ u16 ctrl_ird;
+
+ mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
+ mpa_hdr_len += IETF_RTR_MSG_SIZE;
+ rtr_msg = &mpa_v2_frame->rtr_msg;
+
+ /* parse rtr message */
+ ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+ ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+ ird_size = ctrl_ird & IETF_NO_IRD_ORD;
+ ord_size = ctrl_ord & IETF_NO_IRD_ORD;
+
+ if (!(ctrl_ird & IETF_PEER_TO_PEER))
+ return -1;
+
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+ goto negotiate_done;
+ }
+
+ if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
+ /* responder */
+ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
+ cm_node->ird_size = 1;
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+ } else {
+ /* initiator */
+ if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
+ return -1;
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+
+ if (cm_node->ird_size < ord_size)
+ /* no resources available */
+ return -1;
+ }
+
+negotiate_done:
+ if (ctrl_ord & IETF_RDMA0_READ)
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ else if (ctrl_ord & IETF_RDMA0_WRITE)
+ cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+ else /* Not supported RDMA0 operation */
+ return -1;
+ i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+ "MPAV2: Negotiated ORD: %d, IRD: %d\n",
+ cm_node->ord_size, cm_node->ird_size);
+ break;
+ }
+ break;
+ case IETF_MPA_V1:
+ default:
+ break;
+ }
+
+ memcpy(cm_node->pdata_buf, buffer + mpa_hdr_len, priv_data_len);
+ cm_node->pdata.size = priv_data_len;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+ *type = I40IW_MPA_REQUEST_REJECT;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
+ cm_node->snd_mark_en = true;
+
+ return 0;
+}
+
+/**
+ * i40iw_schedule_cm_timer
+ * @@cm_node: connection's node
+ * @sqbuf: buffer to send
+ * @type: if it es send ot close
+ * @send_retrans: if rexmits to be done
+ * @close_when_complete: is cm_node to be removed
+ *
+ * note - cm_node needs to be protected before calling this. Encase in:
+ * i40iw_rem_ref_cm_node(cm_core, cm_node);
+ * i40iw_schedule_cm_timer(...)
+ * atomic_inc(&cm_node->ref_count);
+ */
+int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *sqbuf,
+ enum i40iw_timer_type type,
+ int send_retrans,
+ int close_when_complete)
+{
+ struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_cm_core *cm_core = cm_node->cm_core;
+ struct i40iw_timer_entry *new_send;
+ int ret = 0;
+ u32 was_timer_set;
+ unsigned long flags;
+
+ new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+ if (!new_send) {
+ i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+ return -ENOMEM;
+ }
+ new_send->retrycount = I40IW_DEFAULT_RETRYS;
+ new_send->retranscount = I40IW_DEFAULT_RETRANS;
+ new_send->sqbuf = sqbuf;
+ new_send->timetosend = jiffies;
+ new_send->type = type;
+ new_send->send_retrans = send_retrans;
+ new_send->close_when_complete = close_when_complete;
+
+ if (type == I40IW_TIMER_TYPE_CLOSE) {
+ new_send->timetosend += (HZ / 10);
+ if (cm_node->close_entry) {
+ kfree(new_send);
+ i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+ i40iw_pr_err("already close entry\n");
+ return -EINVAL;
+ }
+ cm_node->close_entry = new_send;
+ }
+
+ if (type == I40IW_TIMER_TYPE_SEND) {
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ atomic_inc(&cm_node->ref_count);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
+
+ atomic_inc(&sqbuf->refcount);
+ i40iw_puda_send_buf(dev->ilq, sqbuf);
+ if (!send_retrans) {
+ i40iw_cleanup_retrans_entry(cm_node);
+ if (close_when_complete)
+ i40iw_rem_ref_cm_node(cm_node);
+ return ret;
+ }
+ }
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+ if (!was_timer_set) {
+ cm_core->tcp_timer.expires = new_send->timetosend;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ return ret;
+}
+
+/**
+ * i40iw_retrans_expired - Could not rexmit the packet
+ * @cm_node: connection's node
+ */
+static void i40iw_retrans_expired(struct i40iw_cm_node *cm_node)
+{
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ enum i40iw_cm_node_state state = cm_node->state;
+
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ switch (state) {
+ case I40IW_CM_STATE_SYN_RCVD:
+ case I40IW_CM_STATE_CLOSING:
+ i40iw_rem_ref_cm_node(cm_node);
+ break;
+ case I40IW_CM_STATE_FIN_WAIT1:
+ case I40IW_CM_STATE_LAST_ACK:
+ if (cm_node->cm_id)
+ cm_id->rem_ref(cm_id);
+ i40iw_send_reset(cm_node);
+ break;
+ default:
+ atomic_inc(&cm_node->ref_count);
+ i40iw_send_reset(cm_node);
+ i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+ break;
+ }
+}
+
+/**
+ * i40iw_handle_close_entry - for handling retry/timeouts
+ * @cm_node: connection's node
+ * @rem_node: flag for remove cm_node
+ */
+static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node)
+{
+ struct i40iw_timer_entry *close_entry = cm_node->close_entry;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct i40iw_qp *iwqp;
+ unsigned long flags;
+
+ if (!close_entry)
+ return;
+ iwqp = (struct i40iw_qp *)close_entry->sqbuf;
+ if (iwqp) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->cm_id) {
+ iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
+ iwqp->hw_iwarp_state = I40IW_QP_STATE_ERROR;
+ iwqp->last_aeq = I40IW_AE_RESET_SENT;
+ iwqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ i40iw_cm_disconn(iwqp);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else if (rem_node) {
+ /* TIME_WAIT state */
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+ if (cm_id)
+ cm_id->rem_ref(cm_id);
+ kfree(close_entry);
+ cm_node->close_entry = NULL;
+}
+
+/**
+ * i40iw_cm_timer_tick - system's timer expired callback
+ * @pass: Pointing to cm_core
+ */
+static void i40iw_cm_timer_tick(unsigned long pass)
+{
+ unsigned long nexttimeout = jiffies + I40IW_LONG_TIME;
+ struct i40iw_cm_node *cm_node;
+ struct i40iw_timer_entry *send_entry, *close_entry;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
+ u32 settimer = 0;
+ unsigned long timetosend;
+ struct i40iw_sc_dev *dev;
+ unsigned long flags;
+
+ struct list_head timer_list;
+
+ INIT_LIST_HEAD(&timer_list);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ if (cm_node->close_entry || cm_node->send_entry) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->timer_entry, &timer_list);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &timer_list) {
+ cm_node = container_of(list_node,
+ struct i40iw_cm_node,
+ timer_entry);
+ close_entry = cm_node->close_entry;
+
+ if (close_entry) {
+ if (time_after(close_entry->timetosend, jiffies)) {
+ if (nexttimeout > close_entry->timetosend ||
+ !settimer) {
+ nexttimeout = close_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ i40iw_handle_close_entry(cm_node, 1);
+ }
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ goto done;
+ if (time_after(send_entry->timetosend, jiffies)) {
+ if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
+ if ((nexttimeout > send_entry->timetosend) ||
+ !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ i40iw_free_retrans_entry(cm_node);
+ }
+ goto done;
+ }
+
+ if ((cm_node->state == I40IW_CM_STATE_OFFLOADED) ||
+ (cm_node->state == I40IW_CM_STATE_CLOSED)) {
+ i40iw_free_retrans_entry(cm_node);
+ goto done;
+ }
+
+ if (!send_entry->retranscount || !send_entry->retrycount) {
+ i40iw_free_retrans_entry(cm_node);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ i40iw_retrans_expired(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ goto done;
+ }
+ cm_node->cm_core->stats_pkt_retrans++;
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+ dev = cm_node->dev;
+ atomic_inc(&send_entry->sqbuf->refcount);
+ i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ if (send_entry->send_retrans) {
+ send_entry->retranscount--;
+ timetosend = (I40IW_RETRY_TIMEOUT <<
+ (I40IW_DEFAULT_RETRANS -
+ send_entry->retranscount));
+
+ send_entry->timetosend = jiffies +
+ min(timetosend, I40IW_MAX_TIMEOUT);
+ if (nexttimeout > send_entry->timetosend || !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ int close_when_complete;
+
+ close_when_complete = send_entry->close_when_complete;
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "cm_node=%p state=%d\n",
+ cm_node,
+ cm_node->state);
+ i40iw_free_retrans_entry(cm_node);
+ if (close_when_complete)
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+done:
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+
+ if (settimer) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (!timer_pending(&cm_core->tcp_timer)) {
+ cm_core->tcp_timer.expires = nexttimeout;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+}
+
+/**
+ * i40iw_send_syn - send SYN packet
+ * @cm_node: connection's node
+ * @sendack: flag to set ACK bit or not
+ */
+int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack)
+{
+ struct i40iw_puda_buf *sqbuf;
+ int flags = SET_SYN;
+ char optionsbuffer[sizeof(struct option_mss) +
+ sizeof(struct option_windowscale) +
+ sizeof(struct option_base) + TCP_OPTIONS_PADDING];
+ struct i40iw_kmem_info opts;
+
+ int optionssize = 0;
+ /* Sending MSS option */
+ union all_known_options *options;
+
+ opts.addr = optionsbuffer;
+ if (!cm_node) {
+ i40iw_pr_err("no cm_node\n");
+ return -EINVAL;
+ }
+
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_mss.optionnum = OPTION_NUMBER_MSS;
+ options->as_mss.length = sizeof(struct option_mss);
+ options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
+ optionssize += sizeof(struct option_mss);
+
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
+ options->as_windowscale.length = sizeof(struct option_windowscale);
+ options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+ optionssize += sizeof(struct option_windowscale);
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_end = OPTION_NUMBER_END;
+ optionssize += 1;
+
+ if (sendack)
+ flags |= SET_ACK;
+
+ opts.size = optionssize;
+
+ sqbuf = i40iw_form_cm_frame(cm_node, &opts, NULL, NULL, flags);
+ if (!sqbuf) {
+ i40iw_pr_err("no sqbuf\n");
+ return -1;
+ }
+ return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_send_ack - Send ACK packet
+ * @cm_node: connection's node
+ */
+static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_puda_buf *sqbuf;
+
+ sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
+ if (sqbuf)
+ i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf);
+ else
+ i40iw_pr_err("no sqbuf\n");
+}
+
+/**
+ * i40iw_send_fin - Send FIN pkt
+ * @cm_node: connection's node
+ */
+static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_puda_buf *sqbuf;
+
+ sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN);
+ if (!sqbuf) {
+ i40iw_pr_err("no sqbuf\n");
+ return -1;
+ }
+ return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_find_node - find a cm node that matches the reference cm node
+ * @cm_core: cm's core
+ * @rem_port: remote tcp port num
+ * @rem_addr: remote ip addr
+ * @loc_port: local tcp port num
+ * @loc_addr: loc ip addr
+ * @add_refcnt: flag to increment refcount of cm_node
+ */
+struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
+ u16 rem_port,
+ u32 *rem_addr,
+ u16 loc_port,
+ u32 *loc_addr,
+ bool add_refcnt)
+{
+ struct list_head *hte;
+ struct i40iw_cm_node *cm_node;
+ unsigned long flags;
+
+ hte = &cm_core->connected_nodes;
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_entry(cm_node, hte, list) {
+ if (!memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
+ (cm_node->loc_port == loc_port) &&
+ !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr)) &&
+ (cm_node->rem_port == rem_port)) {
+ if (add_refcnt)
+ atomic_inc(&cm_node->ref_count);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ return cm_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ /* no owner node */
+ return NULL;
+}
+
+/**
+ * i40iw_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_port: listener tcp port num
+ * @dst_addr: listener ip addr
+ * @listener_state: state to match with listen node's
+ */
+static struct i40iw_cm_listener *i40iw_find_listener(
+ struct i40iw_cm_core *cm_core,
+ u32 *dst_addr,
+ u16 dst_port,
+ u16 vlan_id,
+ enum i40iw_cm_listener_state
+ listener_state)
+{
+ struct i40iw_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ u32 listen_addr[4];
+ u16 listen_port;
+ unsigned long flags;
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+ listen_port = listen_node->loc_port;
+ /* compare node pair, return node handle if a match */
+ if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
+ !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
+ (listen_port == dst_port) &&
+ (listener_state & listen_node->listener_state)) {
+ atomic_inc(&listen_node->ref_count);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ return listen_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ return NULL;
+}
+
+/**
+ * i40iw_add_hte_node - add a cm node to the hash table
+ * @cm_core: cm's core
+ * @cm_node: connection's node
+ */
+static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
+ struct i40iw_cm_node *cm_node)
+{
+ struct list_head *hte;
+ unsigned long flags;
+
+ if (!cm_node || !cm_core) {
+ i40iw_pr_err("cm_node or cm_core == NULL\n");
+ return;
+ }
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+ /* get a handle on the hash table element (list head for this slot) */
+ hte = &cm_core->connected_nodes;
+ list_add_tail(&cm_node->list, hte);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+}
+
+/**
+ * listen_port_in_use - determine if port is in use
+ * @port: Listen port number
+ */
+static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+{
+ struct i40iw_cm_listener *listen_node;
+ unsigned long flags;
+ bool ret = false;
+
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ if (listen_node->loc_port == port) {
+ ret = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ return ret;
+}
+
+/**
+ * i40iw_del_multiple_qhash - Remove qhash and child listens
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ */
+static enum i40iw_status_code i40iw_del_multiple_qhash(
+ struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cm_info,
+ struct i40iw_cm_listener *cm_parent_listen_node)
+{
+ struct i40iw_cm_listener *child_listen_node;
+ enum i40iw_status_code ret = I40IW_ERR_CONFIG;
+ struct list_head *pos, *tpos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) {
+ child_listen_node = list_entry(pos, struct i40iw_cm_listener, child_listen_list);
+ if (child_listen_node->ipv4)
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ else
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+ "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ list_del(pos);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ cm_info->vlan_id = child_listen_node->vlan_id;
+ ret = i40iw_manage_qhash(iwdev, cm_info,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "freed pointer = %p\n",
+ child_listen_node);
+ }
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+
+ return ret;
+}
+
+/**
+ * i40iw_netdev_vlan_ipv6 - Gets the netdev and mac
+ * @addr: local IPv6 address
+ * @vlan_id: vlan id for the given IPv6 address
+ * @mac: mac address for the given IPv6 address
+ *
+ * Returns the net_device of the IPv6 address and also sets the
+ * vlan id and mac for that address.
+ */
+static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+{
+ struct net_device *ip_dev = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr laddr6;
+
+ i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
+ if (vlan_id)
+ *vlan_id = I40IW_NO_VLAN;
+ if (mac)
+ eth_zero_addr(mac);
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ if (ip_dev->dev_addr && mac)
+ ether_addr_copy(mac, ip_dev->dev_addr);
+ break;
+ }
+ }
+ rcu_read_unlock();
+#endif
+ return ip_dev;
+}
+
+/**
+ * i40iw_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
+ * @addr: local IPv4 address
+ */
+static u16 i40iw_get_vlan_ipv4(u32 *addr)
+{
+ struct net_device *netdev;
+ u16 vlan_id = I40IW_NO_VLAN;
+
+ netdev = ip_dev_find(&init_net, htonl(addr[0]));
+ if (netdev) {
+ vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ dev_put(netdev);
+ }
+ return vlan_id;
+}
+
+/**
+ * i40iw_add_mqh_6 - Adds multiple qhashes for IPv6
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv6 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cm_info,
+ struct i40iw_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp;
+ enum i40iw_status_code ret = 0;
+ struct i40iw_cm_listener *child_listen_node;
+ unsigned long flags;
+
+ rtnl_lock();
+ for_each_netdev_rcu(&init_net, ip_dev) {
+ if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
+ (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
+ (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ i40iw_pr_err("idev == NULL\n");
+ break;
+ }
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "IP=%pI6, vlan_id=%d, MAC=%pM\n",
+ &ifp->addr,
+ rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+ child_listen_node =
+ kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ i40iw_pr_err("listener memory allocation\n");
+ ret = I40IW_ERR_NO_MEMORY;
+ goto exit;
+ }
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+
+ i40iw_copy_ip_ntohl(child_listen_node->loc_addr,
+ ifp->addr.in6_u.u6_addr32);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+
+ ret = i40iw_manage_qhash(iwdev, cm_info,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (!ret) {
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ } else {
+ kfree(child_listen_node);
+ }
+ }
+ }
+ }
+exit:
+ rtnl_unlock();
+ return ret;
+}
+
+/**
+ * i40iw_add_mqh_4 - Adds multiple qhashes for IPv4
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv4 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum i40iw_status_code i40iw_add_mqh_4(
+ struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cm_info,
+ struct i40iw_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *dev;
+ struct in_device *idev;
+ struct i40iw_cm_listener *child_listen_node;
+ enum i40iw_status_code ret = 0;
+ unsigned long flags;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ if ((((rdma_vlan_dev_vlan_id(dev) < I40IW_NO_VLAN) &&
+ (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
+ (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+ idev = in_dev_get(dev);
+ for_ifa(idev) {
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+ &ifa->ifa_address,
+ rdma_vlan_dev_vlan_id(dev),
+ dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ i40iw_pr_err("listener memory allocation\n");
+ in_dev_put(idev);
+ ret = I40IW_ERR_NO_MEMORY;
+ goto exit;
+ }
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node,
+ cm_parent_listen_node,
+ sizeof(*child_listen_node));
+
+ child_listen_node->loc_addr[0] = ntohl(ifa->ifa_address);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+
+ ret = i40iw_manage_qhash(iwdev,
+ cm_info,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_ADD,
+ NULL,
+ true);
+ if (!ret) {
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ } else {
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created--;
+ }
+ }
+ endfor_ifa(idev);
+ in_dev_put(idev);
+ }
+ }
+exit:
+ rtnl_unlock();
+ return ret;
+}
+
+/**
+ * i40iw_dec_refcnt_listen - delete listener and associated cm nodes
+ * @cm_core: cm's core
+ * @free_hanging_nodes: to free associated cm_nodes
+ * @apbvt_del: flag to delete the apbvt
+ */
+static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
+ struct i40iw_cm_listener *listener,
+ int free_hanging_nodes, bool apbvt_del)
+{
+ int ret = -EINVAL;
+ int err = 0;
+ struct list_head *list_pos;
+ struct list_head *list_temp;
+ struct i40iw_cm_node *cm_node;
+ struct list_head reset_list;
+ struct i40iw_cm_info nfo;
+ struct i40iw_cm_node *loopback;
+ enum i40iw_cm_node_state old_state;
+ unsigned long flags;
+
+ /* free non-accelerated child nodes for this listener */
+ INIT_LIST_HEAD(&reset_list);
+ if (free_hanging_nodes) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) {
+ cm_node = container_of(list_pos, struct i40iw_cm_node, list);
+ if ((cm_node->listener == listener) && !cm_node->accelerated) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->reset_entry, &reset_list);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+
+ list_for_each_safe(list_pos, list_temp, &reset_list) {
+ cm_node = container_of(list_pos, struct i40iw_cm_node, reset_entry);
+ loopback = cm_node->loopbackpartner;
+ if (cm_node->state >= I40IW_CM_STATE_FIN_WAIT1) {
+ i40iw_rem_ref_cm_node(cm_node);
+ } else {
+ if (!loopback) {
+ i40iw_cleanup_retrans_entry(cm_node);
+ err = i40iw_send_reset(cm_node);
+ if (err) {
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_pr_err("send reset\n");
+ } else {
+ old_state = cm_node->state;
+ cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != I40IW_CM_STATE_MPAREQ_RCVD)
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+ } else {
+ struct i40iw_cm_event event;
+
+ event.cm_node = loopback;
+ memcpy(event.cm_info.rem_addr,
+ loopback->rem_addr, sizeof(event.cm_info.rem_addr));
+ memcpy(event.cm_info.loc_addr,
+ loopback->loc_addr, sizeof(event.cm_info.loc_addr));
+ event.cm_info.rem_port = loopback->rem_port;
+ event.cm_info.loc_port = loopback->loc_port;
+ event.cm_info.cm_id = loopback->cm_id;
+ event.cm_info.ipv4 = loopback->ipv4;
+ atomic_inc(&loopback->ref_count);
+ loopback->state = I40IW_CM_STATE_CLOSED;
+ i40iw_event_connect_error(&event);
+ cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+ }
+ }
+
+ if (!atomic_dec_return(&listener->ref_count)) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_del(&listener->list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ if (listener->iwdev) {
+ if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+ i40iw_manage_apbvt(listener->iwdev,
+ listener->loc_port,
+ I40IW_MANAGE_APBVT_DEL);
+
+ memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
+ nfo.loc_port = listener->loc_port;
+ nfo.ipv4 = listener->ipv4;
+ nfo.vlan_id = listener->vlan_id;
+
+ if (!list_empty(&listener->child_listen_list)) {
+ i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
+ } else {
+ if (listener->qhash_set)
+ i40iw_manage_qhash(listener->iwdev,
+ &nfo,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ }
+ }
+
+ cm_core->stats_listen_destroyed++;
+ kfree(listener);
+ cm_core->stats_listen_nodes_destroyed++;
+ listener = NULL;
+ ret = 0;
+ }
+
+ if (listener) {
+ if (atomic_read(&listener->pend_accepts_cnt) > 0)
+ i40iw_debug(cm_core->dev,
+ I40IW_DEBUG_CM,
+ "%s: listener (%p) pending accepts=%u\n",
+ __func__,
+ listener,
+ atomic_read(&listener->pend_accepts_cnt));
+ }
+
+ return ret;
+}
+
+/**
+ * i40iw_cm_del_listen - delete a linstener
+ * @cm_core: cm's core
+ * @listener: passive connection's listener
+ * @apbvt_del: flag to delete apbvt
+ */
+static int i40iw_cm_del_listen(struct i40iw_cm_core *cm_core,
+ struct i40iw_cm_listener *listener,
+ bool apbvt_del)
+{
+ listener->listener_state = I40IW_CM_LISTENER_PASSIVE_STATE;
+ listener->cm_id = NULL; /* going to be destroyed pretty soon */
+ return i40iw_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
+}
+
+/**
+ * i40iw_addr_resolve_neigh - resolve neighbor address
+ * @iwdev: iwarp device structure
+ * @src_ip: local ip address
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
+ u32 src_ip,
+ u32 dst_ip,
+ int arpindex)
+{
+ struct rtable *rt;
+ struct neighbour *neigh;
+ int rc = arpindex;
+ struct net_device *netdev = iwdev->netdev;
+ __be32 dst_ipaddr = htonl(dst_ip);
+ __be32 src_ipaddr = htonl(src_ip);
+
+ rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
+ if (IS_ERR(rt)) {
+ i40iw_pr_err("ip_route_output\n");
+ return rc;
+ }
+
+ if (netif_is_bond_slave(netdev))
+ netdev = netdev_master_upper_dev_get(netdev);
+
+ neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
+
+ rcu_read_lock();
+ if (neigh) {
+ if (neigh->nud_state & NUD_VALID) {
+ if (arpindex >= 0) {
+ if (ether_addr_equal(iwdev->arp_table[arpindex].mac_addr,
+ neigh->ha))
+ /* Mac address same as arp table */
+ goto resolve_neigh_exit;
+ i40iw_manage_arp_cache(iwdev,
+ iwdev->arp_table[arpindex].mac_addr,
+ &dst_ip,
+ true,
+ I40IW_ARP_DELETE);
+ }
+
+ i40iw_manage_arp_cache(iwdev, neigh->ha, &dst_ip, true, I40IW_ARP_ADD);
+ rc = i40iw_arp_table(iwdev, &dst_ip, true, NULL, I40IW_ARP_RESOLVE);
+ } else {
+ neigh_event_send(neigh, NULL);
+ }
+ }
+ resolve_neigh_exit:
+
+ rcu_read_unlock();
+ if (neigh)
+ neigh_release(neigh);
+
+ ip_rt_put(rt);
+ return rc;
+}
+
+/**
+ * i40iw_get_dst_ipv6
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
+ struct sockaddr_in6 *dst_addr)
+{
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = dst_addr->sin6_addr;
+ fl6.saddr = src_addr->sin6_addr;
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = dst_addr->sin6_scope_id;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ return dst;
+}
+#endif
+
+/**
+ * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
+ * @iwdev: iwarp device structure
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
+ u32 *src,
+ u32 *dest,
+ int arpindex)
+{
+ struct neighbour *neigh;
+ int rc = arpindex;
+ struct net_device *netdev = iwdev->netdev;
+ struct dst_entry *dst;
+ struct sockaddr_in6 dst_addr;
+ struct sockaddr_in6 src_addr;
+
+ memset(&dst_addr, 0, sizeof(dst_addr));
+ dst_addr.sin6_family = AF_INET6;
+ i40iw_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
+ memset(&src_addr, 0, sizeof(src_addr));
+ src_addr.sin6_family = AF_INET6;
+ i40iw_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
+ dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
+ if (!dst || dst->error) {
+ if (dst) {
+ dst_release(dst);
+ i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
+ dst->error);
+ }
+ return rc;
+ }
+
+ if (netif_is_bond_slave(netdev))
+ netdev = netdev_master_upper_dev_get(netdev);
+
+ neigh = dst_neigh_lookup(dst, &dst_addr);
+
+ rcu_read_lock();
+ if (neigh) {
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "dst_neigh_lookup MAC=%pM\n", neigh->ha);
+ if (neigh->nud_state & NUD_VALID) {
+ if (arpindex >= 0) {
+ if (ether_addr_equal
+ (iwdev->arp_table[arpindex].mac_addr,
+ neigh->ha)) {
+ /* Mac address same as in arp table */
+ goto resolve_neigh_exit6;
+ }
+ i40iw_manage_arp_cache(iwdev,
+ iwdev->arp_table[arpindex].mac_addr,
+ dest,
+ false,
+ I40IW_ARP_DELETE);
+ }
+ i40iw_manage_arp_cache(iwdev,
+ neigh->ha,
+ dest,
+ false,
+ I40IW_ARP_ADD);
+ rc = i40iw_arp_table(iwdev,
+ dest,
+ false,
+ NULL,
+ I40IW_ARP_RESOLVE);
+ } else {
+ neigh_event_send(neigh, NULL);
+ }
+ }
+
+ resolve_neigh_exit6:
+ rcu_read_unlock();
+ if (neigh)
+ neigh_release(neigh);
+ dst_release(dst);
+ return rc;
+}
+#endif
+
+/**
+ * i40iw_ipv4_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+static bool i40iw_ipv4_is_loopback(u32 loc_addr, u32 rem_addr)
+{
+ return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
+}
+
+/**
+ * i40iw_ipv6_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
+{
+ struct in6_addr raddr6;
+
+ i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
+ return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6));
+}
+
+/**
+ * i40iw_make_cm_node - create a new instance of a cm node
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ * @listener: passive connection's listener
+ */
+static struct i40iw_cm_node *i40iw_make_cm_node(
+ struct i40iw_cm_core *cm_core,
+ struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cm_info,
+ struct i40iw_cm_listener *listener)
+{
+ struct i40iw_cm_node *cm_node;
+ struct timespec ts;
+ int oldarpindex;
+ int arpindex;
+ struct net_device *netdev = iwdev->netdev;
+
+ /* create an hte and cm_node for this instance */
+ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+ if (!cm_node)
+ return NULL;
+
+ /* set our node specific transport info */
+ cm_node->ipv4 = cm_info->ipv4;
+ cm_node->vlan_id = cm_info->vlan_id;
+ memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
+ memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
+ cm_node->loc_port = cm_info->loc_port;
+ cm_node->rem_port = cm_info->rem_port;
+
+ cm_node->mpa_frame_rev = iwdev->mpa_version;
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ cm_node->ird_size = I40IW_MAX_IRD_SIZE;
+ cm_node->ord_size = I40IW_MAX_ORD_SIZE;
+
+ cm_node->listener = listener;
+ cm_node->cm_id = cm_info->cm_id;
+ ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
+ spin_lock_init(&cm_node->retrans_list_lock);
+
+ atomic_set(&cm_node->ref_count, 1);
+ /* associate our parent CM core */
+ cm_node->cm_core = cm_core;
+ cm_node->tcp_cntxt.loc_id = I40IW_CM_DEF_LOCAL_ID;
+ cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+ cm_node->tcp_cntxt.rcv_wnd =
+ I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
+ ts = current_kernel_time();
+ cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+ cm_node->tcp_cntxt.mss = iwdev->mss;
+
+ cm_node->iwdev = iwdev;
+ cm_node->dev = &iwdev->sc_dev;
+
+ if ((cm_node->ipv4 &&
+ i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+ (!cm_node->ipv4 && i40iw_ipv6_is_loopback(cm_node->loc_addr,
+ cm_node->rem_addr))) {
+ arpindex = i40iw_arp_table(iwdev,
+ cm_node->rem_addr,
+ false,
+ NULL,
+ I40IW_ARP_RESOLVE);
+ } else {
+ oldarpindex = i40iw_arp_table(iwdev,
+ cm_node->rem_addr,
+ false,
+ NULL,
+ I40IW_ARP_RESOLVE);
+ if (cm_node->ipv4)
+ arpindex = i40iw_addr_resolve_neigh(iwdev,
+ cm_info->loc_addr[0],
+ cm_info->rem_addr[0],
+ oldarpindex);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ arpindex = i40iw_addr_resolve_neigh_ipv6(iwdev,
+ cm_info->loc_addr,
+ cm_info->rem_addr,
+ oldarpindex);
+#endif
+ }
+ if (arpindex < 0) {
+ i40iw_pr_err("cm_node arpindex\n");
+ kfree(cm_node);
+ return NULL;
+ }
+ ether_addr_copy(cm_node->rem_mac, iwdev->arp_table[arpindex].mac_addr);
+ i40iw_add_hte_node(cm_core, cm_node);
+ cm_core->stats_nodes_created++;
+ return cm_node;
+}
+
+/**
+ * i40iw_rem_ref_cm_node - destroy an instance of a cm node
+ * @cm_node: connection's node
+ */
+static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_cm_core *cm_core = cm_node->cm_core;
+ struct i40iw_qp *iwqp;
+ struct i40iw_cm_info nfo;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
+ if (atomic_dec_return(&cm_node->ref_count)) {
+ spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+ return;
+ }
+ list_del(&cm_node->list);
+ spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+
+ /* if the node is destroyed before connection was accelerated */
+ if (!cm_node->accelerated && cm_node->accept_pend) {
+ pr_err("node destroyed before established\n");
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ }
+ if (cm_node->close_entry)
+ i40iw_handle_close_entry(cm_node, 0);
+ if (cm_node->listener) {
+ i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
+ } else {
+ if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) &&
+ cm_node->apbvt_set && cm_node->iwdev) {
+ i40iw_manage_apbvt(cm_node->iwdev,
+ cm_node->loc_port,
+ I40IW_MANAGE_APBVT_DEL);
+ i40iw_get_addr_info(cm_node, &nfo);
+ if (cm_node->qhash_set) {
+ i40iw_manage_qhash(cm_node->iwdev,
+ &nfo,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ cm_node->qhash_set = 0;
+ }
+ }
+ }
+
+ iwqp = cm_node->iwqp;
+ if (iwqp) {
+ iwqp->cm_node = NULL;
+ i40iw_rem_ref(&iwqp->ibqp);
+ cm_node->iwqp = NULL;
+ } else if (cm_node->qhash_set) {
+ i40iw_get_addr_info(cm_node, &nfo);
+ i40iw_manage_qhash(cm_node->iwdev,
+ &nfo,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ cm_node->qhash_set = 0;
+ }
+
+ cm_node->cm_core->stats_nodes_destroyed++;
+ kfree(cm_node);
+}
+
+/**
+ * i40iw_handle_fin_pkt - FIN packet received
+ * @cm_node: connection's node
+ */
+static void i40iw_handle_fin_pkt(struct i40iw_cm_node *cm_node)
+{
+ u32 ret;
+
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_RCVD:
+ case I40IW_CM_STATE_SYN_SENT:
+ case I40IW_CM_STATE_ESTABLISHED:
+ case I40IW_CM_STATE_MPAREJ_RCVD:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_LAST_ACK;
+ i40iw_send_fin(cm_node);
+ break;
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+ cm_node->tcp_cntxt.rcv_nxt++;
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ atomic_inc(&cm_node->ref_count);
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_FIN_WAIT1:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSING;
+ i40iw_send_ack(cm_node);
+ /*
+ * Wait for ACK as this is simultaneous close.
+ * After we receive ACK, do not send anything.
+ * Just rm the node.
+ */
+ break;
+ case I40IW_CM_STATE_FIN_WAIT2:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_TIME_WAIT;
+ i40iw_send_ack(cm_node);
+ ret =
+ i40iw_schedule_cm_timer(cm_node, NULL, I40IW_TIMER_TYPE_CLOSE, 1, 0);
+ if (ret)
+ i40iw_pr_err("node %p state = %d\n", cm_node, cm_node->state);
+ break;
+ case I40IW_CM_STATE_TIME_WAIT:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_rem_ref_cm_node(cm_node);
+ break;
+ case I40IW_CM_STATE_OFFLOADED:
+ default:
+ i40iw_pr_err("bad state node %p state = %d\n", cm_node, cm_node->state);
+ break;
+ }
+}
+
+/**
+ * i40iw_handle_rst_pkt - process received RST packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *rbuf)
+{
+ i40iw_cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_SENT:
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V2:
+ cm_node->mpa_frame_rev = IETF_MPA_V1;
+ /* send a syn and goto syn sent state */
+ cm_node->state = I40IW_CM_STATE_SYN_SENT;
+ if (i40iw_send_syn(cm_node, 0))
+ i40iw_active_open_err(cm_node, false);
+ break;
+ case IETF_MPA_V1:
+ default:
+ i40iw_active_open_err(cm_node, false);
+ break;
+ }
+ break;
+ case I40IW_CM_STATE_MPAREQ_RCVD:
+ atomic_add_return(1, &cm_node->passive_state);
+ break;
+ case I40IW_CM_STATE_ESTABLISHED:
+ case I40IW_CM_STATE_SYN_RCVD:
+ case I40IW_CM_STATE_LISTENING:
+ i40iw_pr_err("Bad state state = %d\n", cm_node->state);
+ i40iw_passive_open_err(cm_node, false);
+ break;
+ case I40IW_CM_STATE_OFFLOADED:
+ i40iw_active_open_err(cm_node, false);
+ break;
+ case I40IW_CM_STATE_CLOSED:
+ break;
+ case I40IW_CM_STATE_FIN_WAIT2:
+ case I40IW_CM_STATE_FIN_WAIT1:
+ case I40IW_CM_STATE_LAST_ACK:
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ case I40IW_CM_STATE_TIME_WAIT:
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_rem_ref_cm_node(cm_node);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * i40iw_handle_rcv_mpa - Process a recv'd mpa buffer
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *rbuf)
+{
+ int ret;
+ int datasize = rbuf->datalen;
+ u8 *dataloc = rbuf->data;
+
+ enum i40iw_cm_event_type type = I40IW_CM_EVENT_UNKNOWN;
+ u32 res_type;
+
+ ret = i40iw_parse_mpa(cm_node, dataloc, &res_type, datasize);
+ if (ret) {
+ if (cm_node->state == I40IW_CM_STATE_MPAREQ_SENT)
+ i40iw_active_open_err(cm_node, true);
+ else
+ i40iw_passive_open_err(cm_node, true);
+ return;
+ }
+
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_ESTABLISHED:
+ if (res_type == I40IW_MPA_REQUEST_REJECT)
+ i40iw_pr_err("state for reject\n");
+ cm_node->state = I40IW_CM_STATE_MPAREQ_RCVD;
+ type = I40IW_CM_EVENT_MPA_REQ;
+ i40iw_send_ack(cm_node); /* ACK received MPA request */
+ atomic_set(&cm_node->passive_state,
+ I40IW_PASSIVE_STATE_INDICATED);
+ break;
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ i40iw_cleanup_retrans_entry(cm_node);
+ if (res_type == I40IW_MPA_REQUEST_REJECT) {
+ type = I40IW_CM_EVENT_MPA_REJECT;
+ cm_node->state = I40IW_CM_STATE_MPAREJ_RCVD;
+ } else {
+ type = I40IW_CM_EVENT_CONNECTED;
+ cm_node->state = I40IW_CM_STATE_OFFLOADED;
+ i40iw_send_ack(cm_node);
+ }
+ break;
+ default:
+ pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
+ break;
+ }
+ i40iw_create_event(cm_node, type);
+}
+
+/**
+ * i40iw_indicate_pkt_err - Send up err event to cm
+ * @cm_node: connection's node
+ */
+static void i40iw_indicate_pkt_err(struct i40iw_cm_node *cm_node)
+{
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_SENT:
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ i40iw_active_open_err(cm_node, true);
+ break;
+ case I40IW_CM_STATE_ESTABLISHED:
+ case I40IW_CM_STATE_SYN_RCVD:
+ i40iw_passive_open_err(cm_node, true);
+ break;
+ case I40IW_CM_STATE_OFFLOADED:
+ default:
+ break;
+ }
+}
+
+/**
+ * i40iw_check_syn - Check for error on received syn ack
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int i40iw_check_syn(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
+{
+ int err = 0;
+
+ if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
+ err = 1;
+ i40iw_active_open_err(cm_node, true);
+ }
+ return err;
+}
+
+/**
+ * i40iw_check_seq - check seq numbers if OK
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int i40iw_check_seq(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
+{
+ int err = 0;
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num)
+ err = -1;
+ else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+ err = -1;
+ if (err) {
+ i40iw_pr_err("seq number\n");
+ i40iw_indicate_pkt_err(cm_node);
+ }
+ return err;
+}
+
+/**
+ * i40iw_handle_syn_pkt - is for Passive node
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_syn_pkt(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
+ struct i40iw_cm_info nfo;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_SENT:
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection */
+ i40iw_active_open_err(cm_node, 1);
+ break;
+ case I40IW_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ cm_node->cm_core->stats_backlog_drops++;
+ i40iw_passive_open_err(cm_node, false);
+ break;
+ }
+ ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (ret) {
+ i40iw_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+ cm_node->state = I40IW_CM_STATE_SYN_RCVD;
+ i40iw_get_addr_info(cm_node, &nfo);
+ ret = i40iw_manage_qhash(cm_node->iwdev,
+ &nfo,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_ADD,
+ (void *)cm_node,
+ false);
+ cm_node->qhash_set = true;
+ break;
+ case I40IW_CM_STATE_CLOSED:
+ i40iw_cleanup_retrans_entry(cm_node);
+ atomic_inc(&cm_node->ref_count);
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_OFFLOADED:
+ case I40IW_CM_STATE_ESTABLISHED:
+ case I40IW_CM_STATE_FIN_WAIT1:
+ case I40IW_CM_STATE_FIN_WAIT2:
+ case I40IW_CM_STATE_MPAREQ_RCVD:
+ case I40IW_CM_STATE_LAST_ACK:
+ case I40IW_CM_STATE_CLOSING:
+ case I40IW_CM_STATE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/**
+ * i40iw_handle_synack_pkt - Process SYN+ACK packet (active side)
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_synack_pkt(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_SENT:
+ i40iw_cleanup_retrans_entry(cm_node);
+ /* active open */
+ if (i40iw_check_syn(cm_node, tcph)) {
+ i40iw_pr_err("check syn fail\n");
+ return;
+ }
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 0);
+ if (ret) {
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ i40iw_send_ack(cm_node); /* ACK for the syn_ack */
+ ret = i40iw_send_mpa_request(cm_node);
+ if (ret) {
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "cm_node=%p i40iw_send_mpa_request failed\n",
+ cm_node);
+ break;
+ }
+ cm_node->state = I40IW_CM_STATE_MPAREQ_SENT;
+ break;
+ case I40IW_CM_STATE_MPAREQ_RCVD:
+ i40iw_passive_open_err(cm_node, true);
+ break;
+ case I40IW_CM_STATE_LISTENING:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_CLOSED:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ i40iw_cleanup_retrans_entry(cm_node);
+ atomic_inc(&cm_node->ref_count);
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_ESTABLISHED:
+ case I40IW_CM_STATE_FIN_WAIT1:
+ case I40IW_CM_STATE_FIN_WAIT2:
+ case I40IW_CM_STATE_LAST_ACK:
+ case I40IW_CM_STATE_OFFLOADED:
+ case I40IW_CM_STATE_CLOSING:
+ case I40IW_CM_STATE_UNKNOWN:
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ default:
+ break;
+ }
+}
+
+/**
+ * i40iw_handle_ack_pkt - process packet with ACK
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static int i40iw_handle_ack_pkt(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 inc_sequence;
+ int ret = 0;
+ int optionsize;
+ u32 datasize = rbuf->datalen;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+ if (i40iw_check_seq(cm_node, tcph))
+ return -EINVAL;
+
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_RCVD:
+ i40iw_cleanup_retrans_entry(cm_node);
+ ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (ret)
+ break;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ cm_node->state = I40IW_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ i40iw_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case I40IW_CM_STATE_ESTABLISHED:
+ i40iw_cleanup_retrans_entry(cm_node);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ i40iw_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ i40iw_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case I40IW_CM_STATE_LISTENING:
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_CLOSED:
+ i40iw_cleanup_retrans_entry(cm_node);
+ atomic_inc(&cm_node->ref_count);
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_LAST_ACK:
+ case I40IW_CM_STATE_CLOSING:
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ if (!cm_node->accept_pend)
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ i40iw_rem_ref_cm_node(cm_node);
+ break;
+ case I40IW_CM_STATE_FIN_WAIT1:
+ i40iw_cleanup_retrans_entry(cm_node);
+ cm_node->state = I40IW_CM_STATE_FIN_WAIT2;
+ break;
+ case I40IW_CM_STATE_SYN_SENT:
+ case I40IW_CM_STATE_FIN_WAIT2:
+ case I40IW_CM_STATE_OFFLOADED:
+ case I40IW_CM_STATE_MPAREQ_RCVD:
+ case I40IW_CM_STATE_UNKNOWN:
+ default:
+ i40iw_cleanup_retrans_entry(cm_node);
+ break;
+ }
+ return ret;
+}
+
+/**
+ * i40iw_process_packet - process cm packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_process_packet(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *rbuf)
+{
+ enum i40iw_tcpip_pkt_type pkt_type = I40IW_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 fin_set = 0;
+ int ret;
+
+ if (tcph->rst) {
+ pkt_type = I40IW_PKT_TYPE_RST;
+ } else if (tcph->syn) {
+ pkt_type = I40IW_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = I40IW_PKT_TYPE_SYNACK;
+ } else if (tcph->ack) {
+ pkt_type = I40IW_PKT_TYPE_ACK;
+ }
+ if (tcph->fin)
+ fin_set = 1;
+
+ switch (pkt_type) {
+ case I40IW_PKT_TYPE_SYN:
+ i40iw_handle_syn_pkt(cm_node, rbuf);
+ break;
+ case I40IW_PKT_TYPE_SYNACK:
+ i40iw_handle_synack_pkt(cm_node, rbuf);
+ break;
+ case I40IW_PKT_TYPE_ACK:
+ ret = i40iw_handle_ack_pkt(cm_node, rbuf);
+ if (fin_set && !ret)
+ i40iw_handle_fin_pkt(cm_node);
+ break;
+ case I40IW_PKT_TYPE_RST:
+ i40iw_handle_rst_pkt(cm_node, rbuf);
+ break;
+ default:
+ if (fin_set &&
+ (!i40iw_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
+ i40iw_handle_fin_pkt(cm_node);
+ break;
+ }
+}
+
+/**
+ * i40iw_make_listen_node - create a listen node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ */
+static struct i40iw_cm_listener *i40iw_make_listen_node(
+ struct i40iw_cm_core *cm_core,
+ struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cm_info)
+{
+ struct i40iw_cm_listener *listener;
+ unsigned long flags;
+
+ /* cannot have multiple matching listeners */
+ listener = i40iw_find_listener(cm_core, cm_info->loc_addr,
+ cm_info->loc_port,
+ cm_info->vlan_id,
+ I40IW_CM_LISTENER_EITHER_STATE);
+ if (listener &&
+ (listener->listener_state == I40IW_CM_LISTENER_ACTIVE_STATE)) {
+ atomic_dec(&listener->ref_count);
+ i40iw_debug(cm_core->dev,
+ I40IW_DEBUG_CM,
+ "Not creating listener since it already exists\n");
+ return NULL;
+ }
+
+ if (!listener) {
+ /* create a CM listen node (1/2 node to compare incoming traffic to) */
+ listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
+ if (!listener)
+ return NULL;
+ cm_core->stats_listen_nodes_created++;
+ memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr));
+ listener->loc_port = cm_info->loc_port;
+
+ INIT_LIST_HEAD(&listener->child_listen_list);
+
+ atomic_set(&listener->ref_count, 1);
+ } else {
+ listener->reused_node = 1;
+ }
+
+ listener->cm_id = cm_info->cm_id;
+ listener->ipv4 = cm_info->ipv4;
+ listener->vlan_id = cm_info->vlan_id;
+ atomic_set(&listener->pend_accepts_cnt, 0);
+ listener->cm_core = cm_core;
+ listener->iwdev = iwdev;
+
+ listener->backlog = cm_info->backlog;
+ listener->listener_state = I40IW_CM_LISTENER_ACTIVE_STATE;
+
+ if (!listener->reused_node) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_add(&listener->list, &cm_core->listen_nodes);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ }
+
+ return listener;
+}
+
+/**
+ * i40iw_create_cm_node - make a connection node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @private_data_len: len to provate data for mpa request
+ * @private_data: pointer to private data for connection
+ * @cm_info: quad info for connection
+ */
+static struct i40iw_cm_node *i40iw_create_cm_node(
+ struct i40iw_cm_core *cm_core,
+ struct i40iw_device *iwdev,
+ u16 private_data_len,
+ void *private_data,
+ struct i40iw_cm_info *cm_info)
+{
+ int ret;
+ struct i40iw_cm_node *cm_node;
+ struct i40iw_cm_listener *loopback_remotelistener;
+ struct i40iw_cm_node *loopback_remotenode;
+ struct i40iw_cm_info loopback_cm_info;
+
+ /* create a CM connection node */
+ cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
+ if (!cm_node)
+ return NULL;
+ /* set our node side to client (active) side */
+ cm_node->tcp_cntxt.client = 1;
+ cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+
+ if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
+ loopback_remotelistener = i40iw_find_listener(
+ cm_core,
+ cm_info->rem_addr,
+ cm_node->rem_port,
+ cm_node->vlan_id,
+ I40IW_CM_LISTENER_ACTIVE_STATE);
+ if (!loopback_remotelistener) {
+ i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+ } else {
+ loopback_cm_info = *cm_info;
+ loopback_cm_info.loc_port = cm_info->rem_port;
+ loopback_cm_info.rem_port = cm_info->loc_port;
+ loopback_cm_info.cm_id = loopback_remotelistener->cm_id;
+ loopback_cm_info.ipv4 = cm_info->ipv4;
+ loopback_remotenode = i40iw_make_cm_node(cm_core,
+ iwdev,
+ &loopback_cm_info,
+ loopback_remotelistener);
+ if (!loopback_remotenode) {
+ i40iw_rem_ref_cm_node(cm_node);
+ return NULL;
+ }
+ cm_core->stats_loopbacks++;
+ loopback_remotenode->loopbackpartner = cm_node;
+ loopback_remotenode->tcp_cntxt.rcv_wscale =
+ I40IW_CM_DEFAULT_RCV_WND_SCALE;
+ cm_node->loopbackpartner = loopback_remotenode;
+ memcpy(loopback_remotenode->pdata_buf, private_data,
+ private_data_len);
+ loopback_remotenode->pdata.size = private_data_len;
+
+ cm_node->state = I40IW_CM_STATE_OFFLOADED;
+ cm_node->tcp_cntxt.rcv_nxt =
+ loopback_remotenode->tcp_cntxt.loc_seq_num;
+ loopback_remotenode->tcp_cntxt.rcv_nxt =
+ cm_node->tcp_cntxt.loc_seq_num;
+ cm_node->tcp_cntxt.max_snd_wnd =
+ loopback_remotenode->tcp_cntxt.rcv_wnd;
+ loopback_remotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wnd = loopback_remotenode->tcp_cntxt.rcv_wnd;
+ loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale;
+ loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
+ loopback_remotenode->state = I40IW_CM_STATE_MPAREQ_RCVD;
+ i40iw_create_event(loopback_remotenode, I40IW_CM_EVENT_MPA_REQ);
+ }
+ return cm_node;
+ }
+
+ cm_node->pdata.size = private_data_len;
+ cm_node->pdata.addr = cm_node->pdata_buf;
+
+ memcpy(cm_node->pdata_buf, private_data, private_data_len);
+
+ cm_node->state = I40IW_CM_STATE_SYN_SENT;
+ ret = i40iw_send_syn(cm_node, 0);
+
+ if (ret) {
+ if (cm_node->ipv4)
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI4",
+ cm_node->rem_addr);
+ else
+ i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI6",
+ cm_node->rem_addr);
+ i40iw_rem_ref_cm_node(cm_node);
+ cm_node = NULL;
+ }
+
+ if (cm_node)
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_port,
+ cm_node,
+ cm_node->cm_id);
+
+ return cm_node;
+}
+
+/**
+ * i40iw_cm_reject - reject and teardown a connection
+ * @cm_node: connection's node
+ * @pdate: ptr to private data for reject
+ * @plen: size of private data
+ */
+static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen)
+{
+ int ret = 0;
+ int err;
+ int passive_state;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct i40iw_cm_node *loopback = cm_node->loopbackpartner;
+
+ if (cm_node->tcp_cntxt.client)
+ return ret;
+ i40iw_cleanup_retrans_entry(cm_node);
+
+ if (!loopback) {
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == I40IW_SEND_RESET_EVENT) {
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ i40iw_rem_ref_cm_node(cm_node);
+ } else {
+ if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+ i40iw_rem_ref_cm_node(cm_node);
+ } else {
+ ret = i40iw_send_mpa_reject(cm_node, pdata, plen);
+ if (ret) {
+ cm_node->state = I40IW_CM_STATE_CLOSED;
+ err = i40iw_send_reset(cm_node);
+ if (err)
+ i40iw_pr_err("send reset failed\n");
+ } else {
+ cm_id->add_ref(cm_id);
+ }
+ }
+ }
+ } else {
+ cm_node->cm_id = NULL;
+ if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+ i40iw_rem_ref_cm_node(cm_node);
+ i40iw_rem_ref_cm_node(loopback);
+ } else {
+ ret = i40iw_send_cm_event(loopback,
+ loopback->cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNREFUSED);
+ i40iw_rem_ref_cm_node(cm_node);
+ loopback->state = I40IW_CM_STATE_CLOSING;
+
+ cm_id = loopback->cm_id;
+ i40iw_rem_ref_cm_node(loopback);
+ cm_id->rem_ref(cm_id);
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * i40iw_cm_close - close of cm connection
+ * @cm_node: connection's node
+ */
+static int i40iw_cm_close(struct i40iw_cm_node *cm_node)
+{
+ int ret = 0;
+
+ if (!cm_node)
+ return -EINVAL;
+
+ switch (cm_node->state) {
+ case I40IW_CM_STATE_SYN_RCVD:
+ case I40IW_CM_STATE_SYN_SENT:
+ case I40IW_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case I40IW_CM_STATE_ESTABLISHED:
+ case I40IW_CM_STATE_ACCEPTING:
+ case I40IW_CM_STATE_MPAREQ_SENT:
+ case I40IW_CM_STATE_MPAREQ_RCVD:
+ i40iw_cleanup_retrans_entry(cm_node);
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_CLOSE_WAIT:
+ cm_node->state = I40IW_CM_STATE_LAST_ACK;
+ i40iw_send_fin(cm_node);
+ break;
+ case I40IW_CM_STATE_FIN_WAIT1:
+ case I40IW_CM_STATE_FIN_WAIT2:
+ case I40IW_CM_STATE_LAST_ACK:
+ case I40IW_CM_STATE_TIME_WAIT:
+ case I40IW_CM_STATE_CLOSING:
+ ret = -1;
+ break;
+ case I40IW_CM_STATE_LISTENING:
+ i40iw_cleanup_retrans_entry(cm_node);
+ i40iw_send_reset(cm_node);
+ break;
+ case I40IW_CM_STATE_MPAREJ_RCVD:
+ case I40IW_CM_STATE_UNKNOWN:
+ case I40IW_CM_STATE_INITED:
+ case I40IW_CM_STATE_CLOSED:
+ case I40IW_CM_STATE_LISTENER_DESTROYED:
+ i40iw_rem_ref_cm_node(cm_node);
+ break;
+ case I40IW_CM_STATE_OFFLOADED:
+ if (cm_node->send_entry)
+ i40iw_pr_err("send_entry\n");
+ i40iw_rem_ref_cm_node(cm_node);
+ break;
+ }
+ return ret;
+}
+
+/**
+ * i40iw_receive_ilq - recv an ETHERNET packet, and process it
+ * through CM
+ * @dev: FPK dev struct
+ * @rbuf: receive buffer
+ */
+void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
+{
+ struct i40iw_cm_node *cm_node;
+ struct i40iw_cm_listener *listener;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ struct i40iw_cm_info cm_info;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ struct vlan_ethhdr *ethh;
+
+ /* if vlan, then maclen = 18 else 14 */
+ iph = (struct iphdr *)rbuf->iph;
+ memset(&cm_info, 0, sizeof(cm_info));
+
+ i40iw_debug_buf(dev,
+ I40IW_DEBUG_ILQ,
+ "RECEIVE ILQ BUFFER",
+ rbuf->mem.va,
+ rbuf->totallen);
+ ethh = (struct vlan_ethhdr *)rbuf->mem.va;
+
+ if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
+ cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
+ i40iw_debug(cm_core->dev,
+ I40IW_DEBUG_CM,
+ "%s vlan_id=%d\n",
+ __func__,
+ cm_info.vlan_id);
+ } else {
+ cm_info.vlan_id = I40IW_NO_VLAN;
+ }
+ tcph = (struct tcphdr *)rbuf->tcph;
+
+ if (rbuf->ipv4) {
+ cm_info.loc_addr[0] = ntohl(iph->daddr);
+ cm_info.rem_addr[0] = ntohl(iph->saddr);
+ cm_info.ipv4 = true;
+ } else {
+ ip6h = (struct ipv6hdr *)rbuf->iph;
+ i40iw_copy_ip_ntohl(cm_info.loc_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ i40iw_copy_ip_ntohl(cm_info.rem_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ cm_info.ipv4 = false;
+ }
+ cm_info.loc_port = ntohs(tcph->dest);
+ cm_info.rem_port = ntohs(tcph->source);
+ cm_node = i40iw_find_node(cm_core,
+ cm_info.rem_port,
+ cm_info.rem_addr,
+ cm_info.loc_port,
+ cm_info.loc_addr,
+ true);
+
+ if (!cm_node) {
+ /* Only type of packet accepted are for */
+ /* the PASSIVE open (syn only) */
+ if (!tcph->syn || tcph->ack)
+ return;
+ listener =
+ i40iw_find_listener(cm_core,
+ cm_info.loc_addr,
+ cm_info.loc_port,
+ cm_info.vlan_id,
+ I40IW_CM_LISTENER_ACTIVE_STATE);
+ if (!listener) {
+ cm_info.cm_id = NULL;
+ i40iw_debug(cm_core->dev,
+ I40IW_DEBUG_CM,
+ "%s no listener found\n",
+ __func__);
+ return;
+ }
+ cm_info.cm_id = listener->cm_id;
+ cm_node = i40iw_make_cm_node(cm_core, iwdev, &cm_info, listener);
+ if (!cm_node) {
+ i40iw_debug(cm_core->dev,
+ I40IW_DEBUG_CM,
+ "%s allocate node failed\n",
+ __func__);
+ atomic_dec(&listener->ref_count);
+ return;
+ }
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = I40IW_CM_STATE_LISTENING;
+ } else {
+ i40iw_rem_ref_cm_node(cm_node);
+ return;
+ }
+ atomic_inc(&cm_node->ref_count);
+ } else if (cm_node->state == I40IW_CM_STATE_OFFLOADED) {
+ i40iw_rem_ref_cm_node(cm_node);
+ return;
+ }
+ i40iw_process_packet(cm_node, rbuf);
+ i40iw_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * i40iw_setup_cm_core - allocate a top level instance of a cm
+ * core
+ * @iwdev: iwarp device structure
+ */
+void i40iw_setup_cm_core(struct i40iw_device *iwdev)
+{
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+
+ cm_core->iwdev = iwdev;
+ cm_core->dev = &iwdev->sc_dev;
+
+ INIT_LIST_HEAD(&cm_core->connected_nodes);
+ INIT_LIST_HEAD(&cm_core->listen_nodes);
+
+ init_timer(&cm_core->tcp_timer);
+ cm_core->tcp_timer.function = i40iw_cm_timer_tick;
+ cm_core->tcp_timer.data = (unsigned long)cm_core;
+
+ spin_lock_init(&cm_core->ht_lock);
+ spin_lock_init(&cm_core->listen_list_lock);
+
+ cm_core->event_wq = create_singlethread_workqueue("iwewq");
+ cm_core->disconn_wq = create_singlethread_workqueue("iwdwq");
+}
+
+/**
+ * i40iw_cleanup_cm_core - deallocate a top level instance of a
+ * cm core
+ * @cm_core: cm's core
+ */
+void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core)
+{
+ unsigned long flags;
+
+ if (!cm_core)
+ return;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (timer_pending(&cm_core->tcp_timer))
+ del_timer_sync(&cm_core->tcp_timer);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ destroy_workqueue(cm_core->event_wq);
+ destroy_workqueue(cm_core->disconn_wq);
+}
+
+/**
+ * i40iw_init_tcp_ctx - setup qp context
+ * @cm_node: connection's node
+ * @tcp_info: offload info for tcp
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
+ struct i40iw_tcp_offload_info *tcp_info,
+ struct i40iw_qp *iwqp)
+{
+ tcp_info->ipv4 = cm_node->ipv4;
+ tcp_info->drop_ooo_seg = true;
+ tcp_info->wscale = true;
+ tcp_info->ignore_tcp_opt = true;
+ tcp_info->ignore_tcp_uns_opt = true;
+ tcp_info->no_nagle = false;
+
+ tcp_info->ttl = I40IW_DEFAULT_TTL;
+ tcp_info->rtt_var = cpu_to_le32(I40IW_DEFAULT_RTT_VAR);
+ tcp_info->ss_thresh = cpu_to_le32(I40IW_DEFAULT_SS_THRESH);
+ tcp_info->rexmit_thresh = I40IW_DEFAULT_REXMIT_THRESH;
+
+ tcp_info->tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+ tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
+ tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ tcp_info->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
+ tcp_info->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+ tcp_info->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+
+ tcp_info->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ tcp_info->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
+ tcp_info->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+ tcp_info->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ tcp_info->max_snd_window = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
+ tcp_info->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
+ cm_node->tcp_cntxt.rcv_wscale);
+
+ tcp_info->flow_label = 0;
+ tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
+ if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+ tcp_info->insert_vlan_tag = true;
+ tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
+ }
+ if (cm_node->ipv4) {
+ tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
+ tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
+
+ tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
+ tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
+ tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev,
+ &tcp_info->dest_ip_addr3,
+ true,
+ NULL,
+ I40IW_ARP_RESOLVE));
+ } else {
+ tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
+ tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
+ tcp_info->dest_ip_addr0 = cpu_to_le32(cm_node->rem_addr[0]);
+ tcp_info->dest_ip_addr1 = cpu_to_le32(cm_node->rem_addr[1]);
+ tcp_info->dest_ip_addr2 = cpu_to_le32(cm_node->rem_addr[2]);
+ tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[3]);
+ tcp_info->local_ipaddr0 = cpu_to_le32(cm_node->loc_addr[0]);
+ tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
+ tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
+ tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
+ tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(
+ iwqp->iwdev,
+ &tcp_info->dest_ip_addr0,
+ false,
+ NULL,
+ I40IW_ARP_RESOLVE));
+ }
+}
+
+/**
+ * i40iw_cm_init_tsa_conn - setup qp for RTS
+ * @iwqp: associate qp for the connection
+ * @cm_node: connection's node
+ */
+static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
+ struct i40iw_cm_node *cm_node)
+{
+ struct i40iw_tcp_offload_info tcp_info;
+ struct i40iwarp_offload_info *iwarp_info;
+ struct i40iw_qp_host_ctx_info *ctx_info;
+ struct i40iw_device *iwdev = iwqp->iwdev;
+ struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
+
+ memset(&tcp_info, 0x00, sizeof(struct i40iw_tcp_offload_info));
+ iwarp_info = &iwqp->iwarp_info;
+ ctx_info = &iwqp->ctx_info;
+
+ ctx_info->tcp_info = &tcp_info;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+ iwarp_info->ord_size = cm_node->ord_size;
+ iwarp_info->ird_size = i40iw_derive_hw_ird_setting(cm_node->ird_size);
+
+ if (iwarp_info->ord_size == 1)
+ iwarp_info->ord_size = 2;
+
+ iwarp_info->rd_enable = true;
+ iwarp_info->rdmap_ver = 1;
+ iwarp_info->ddp_ver = 1;
+
+ iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
+
+ ctx_info->tcp_info_valid = true;
+ ctx_info->iwarp_info_valid = true;
+
+ i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
+ if (cm_node->snd_mark_en) {
+ iwarp_info->snd_mark_en = true;
+ iwarp_info->snd_mark_offset = (tcp_info.snd_nxt &
+ SNDMARKER_SEQNMASK) + cm_node->lsmm_size;
+ }
+
+ cm_node->state = I40IW_CM_STATE_OFFLOADED;
+ tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+ tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
+
+ dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
+
+ /* once tcp_info is set, no need to do it again */
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+}
+
+/**
+ * i40iw_cm_disconn - when a connection is being closed
+ * @iwqp: associate qp for the connection
+ */
+int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+{
+ struct disconn_work *work;
+ struct i40iw_device *iwdev = iwqp->iwdev;
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM; /* Timer will clean up */
+
+ i40iw_add_ref(&iwqp->ibqp);
+ work->iwqp = iwqp;
+ INIT_WORK(&work->work, i40iw_disconnect_worker);
+ queue_work(cm_core->disconn_wq, &work->work);
+ return 0;
+}
+
+/**
+ * i40iw_loopback_nop - Send a nop
+ * @qp: associated hw qp
+ */
+static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = qp->qp_uk.sq_base->elem;
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+ LS_64(0, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+ set_64bit_val(wqe, 24, header);
+}
+
+/**
+ * i40iw_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
+{
+ struct i40iw_device *iwdev;
+ struct i40iw_ib_device *iwibdev;
+
+ iwdev = to_iwdev(iwqp->ibqp.device);
+ if (!iwdev) {
+ i40iw_pr_err("iwdev == NULL\n");
+ return;
+ }
+
+ iwibdev = iwdev->iwibdev;
+
+ if (iwqp->active_conn) {
+ /* indicate this connection is NOT active */
+ iwqp->active_conn = 0;
+ } else {
+ /* Need to free the Last Streaming Mode Message */
+ if (iwqp->ietf_mem.va) {
+ if (iwqp->lsmm_mr)
+ iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr);
+ i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
+ }
+ }
+
+ /* close the CM node down if it is still active */
+ if (iwqp->cm_node) {
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "%s Call close API\n", __func__);
+ i40iw_cm_close(iwqp->cm_node);
+ }
+}
+
+/**
+ * i40iw_cm_disconn_true - called by worker thread to disconnect qp
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
+{
+ struct iw_cm_id *cm_id;
+ struct i40iw_device *iwdev;
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+ u16 last_ae;
+ u8 original_hw_tcp_state;
+ u8 original_ibqp_state;
+ int disconn_status = 0;
+ int issue_disconn = 0;
+ int issue_close = 0;
+ int issue_flush = 0;
+ struct ib_event ibevent;
+ unsigned long flags;
+ int ret;
+
+ if (!iwqp) {
+ i40iw_pr_err("iwqp == NULL\n");
+ return;
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ cm_id = iwqp->cm_id;
+ /* make sure we havent already closed this connection */
+ if (!cm_id) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ return;
+ }
+
+ iwdev = to_iwdev(iwqp->ibqp.device);
+
+ original_hw_tcp_state = iwqp->hw_tcp_state;
+ original_ibqp_state = iwqp->ibqp_state;
+ last_ae = iwqp->last_aeq;
+
+ if (qp->term_flags) {
+ issue_disconn = 1;
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ /*When term timer expires after cm_timer, don't want
+ *terminate-handler to issue cm_disconn which can re-free
+ *a QP even after its refcnt=0.
+ */
+ del_timer(&iwqp->terminate_timer);
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ } else if ((original_hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+ issue_disconn = 1;
+ if (last_ae == I40IW_AE_LLP_CONNECTION_RESET)
+ disconn_status = -ECONNRESET;
+ }
+
+ if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
+ (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
+ (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
+ (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (issue_flush && !iwqp->destroyed) {
+ /* Flush the queues */
+ i40iw_flush_wqes(iwdev, iwqp);
+
+ if (qp->term_flags) {
+ ibevent.device = iwqp->ibqp.device;
+ ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
+ IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
+ ibevent.element.qp = &iwqp->ibqp;
+ iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
+ }
+ }
+
+ if (cm_id && cm_id->event_handler) {
+ if (issue_disconn) {
+ ret = i40iw_send_cm_event(NULL,
+ cm_id,
+ IW_CM_EVENT_DISCONNECT,
+ disconn_status);
+
+ if (ret)
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "disconnect event failed %s: - cm_id = %p\n",
+ __func__, cm_id);
+ }
+ if (issue_close) {
+ i40iw_qp_disconnect(iwqp);
+ cm_id->provider_data = iwqp;
+ ret = i40iw_send_cm_event(NULL, cm_id, IW_CM_EVENT_CLOSE, 0);
+ if (ret)
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "close event failed %s: - cm_id = %p\n",
+ __func__, cm_id);
+ cm_id->rem_ref(cm_id);
+ }
+ }
+}
+
+/**
+ * i40iw_disconnect_worker - worker for connection close
+ * @work: points or disconn structure
+ */
+static void i40iw_disconnect_worker(struct work_struct *work)
+{
+ struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+ struct i40iw_qp *iwqp = dwork->iwqp;
+
+ kfree(dwork);
+ i40iw_cm_disconn_true(iwqp);
+ i40iw_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * i40iw_accept - registered call for connection to be accepted
+ * @cm_id: cm information for passive connection
+ * @conn_param: accpet parameters
+ */
+int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct i40iw_qp *iwqp;
+ struct i40iw_device *iwdev;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_cm_node *cm_node;
+ struct ib_qp_attr attr;
+ int passive_state;
+ struct i40iw_ib_device *iwibdev;
+ struct ib_mr *ibmr;
+ struct i40iw_pd *iwpd;
+ u16 buf_len = 0;
+ struct i40iw_kmem_info accept;
+ enum i40iw_status_code status;
+ u64 tagged_offset;
+
+ memset(&attr, 0, sizeof(attr));
+ ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+
+ iwqp = to_iwqp(ibqp);
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->sc_dev;
+ cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
+
+ if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
+ cm_node->ipv4 = true;
+ cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr);
+ } else {
+ cm_node->ipv4 = false;
+ i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL);
+ }
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Accept vlan_id=%d\n",
+ cm_node->vlan_id);
+ if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+ if (cm_node->loopbackpartner)
+ i40iw_rem_ref_cm_node(cm_node->loopbackpartner);
+ i40iw_rem_ref_cm_node(cm_node);
+ return -EINVAL;
+ }
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == I40IW_SEND_RESET_EVENT) {
+ i40iw_rem_ref_cm_node(cm_node);
+ return -ECONNRESET;
+ }
+
+ cm_node->cm_core->stats_accepts++;
+ iwqp->cm_node = (void *)cm_node;
+ cm_node->iwqp = iwqp;
+
+ buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN;
+
+ status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
+
+ if (status)
+ return -ENOMEM;
+ cm_node->pdata.size = conn_param->private_data_len;
+ accept.addr = iwqp->ietf_mem.va;
+ accept.size = i40iw_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
+ memcpy(accept.addr + accept.size, conn_param->private_data,
+ conn_param->private_data_len);
+
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ if ((cm_node->ipv4 &&
+ !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+ (!cm_node->ipv4 &&
+ !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
+ iwibdev = iwdev->iwibdev;
+ iwpd = iwqp->iwpd;
+ tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
+ ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
+ iwqp->ietf_mem.pa,
+ buf_len,
+ IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (IS_ERR(ibmr)) {
+ i40iw_free_dma_mem(dev->hw, &iwqp->ietf_mem);
+ return -ENOMEM;
+ }
+
+ ibmr->pd = &iwpd->ibpd;
+ ibmr->device = iwpd->ibpd.device;
+ iwqp->lsmm_mr = ibmr;
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+ if (is_remote_ne020_or_chelsio(cm_node))
+ dev->iw_priv_qp_ops->qp_send_lsmm(
+ &iwqp->sc_qp,
+ iwqp->ietf_mem.va,
+ (accept.size + conn_param->private_data_len),
+ ibmr->lkey);
+ else
+ dev->iw_priv_qp_ops->qp_send_lsmm(
+ &iwqp->sc_qp,
+ iwqp->ietf_mem.va,
+ (accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN),
+ ibmr->lkey);
+
+ } else {
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+ i40iw_loopback_nop(&iwqp->sc_qp);
+ }
+
+ if (iwqp->page)
+ kunmap(iwqp->page);
+
+ iwqp->cm_id = cm_id;
+ cm_node->cm_id = cm_id;
+
+ cm_id->provider_data = (void *)iwqp;
+ iwqp->active_conn = 0;
+
+ cm_node->lsmm_size = accept.size + conn_param->private_data_len;
+ i40iw_cm_init_tsa_conn(iwqp, cm_node);
+ cm_id->add_ref(cm_id);
+ i40iw_add_ref(&iwqp->ibqp);
+
+ i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (cm_node->loopbackpartner) {
+ cm_node->loopbackpartner->pdata.size = conn_param->private_data_len;
+
+ /* copy entire MPA frame to our cm_node's frame */
+ memcpy(cm_node->loopbackpartner->pdata_buf,
+ conn_param->private_data,
+ conn_param->private_data_len);
+ i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED);
+ }
+
+ cm_node->accelerated = 1;
+ if (cm_node->accept_pend) {
+ if (!cm_node->listener)
+ i40iw_pr_err("cm_node->listener NULL for passive node\n");
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+ return 0;
+}
+
+/**
+ * i40iw_reject - registered call for connection to be rejected
+ * @cm_id: cm information for passive connection
+ * @pdata: private data to be sent
+ * @pdata_len: private data length
+ */
+int i40iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct i40iw_device *iwdev;
+ struct i40iw_cm_node *cm_node;
+ struct i40iw_cm_node *loopback;
+
+ cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
+ loopback = cm_node->loopbackpartner;
+ cm_node->cm_id = cm_id;
+ cm_node->pdata.size = pdata_len;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+ cm_node->cm_core->stats_rejects++;
+
+ if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
+ return -EINVAL;
+
+ if (loopback) {
+ memcpy(&loopback->pdata_buf, pdata, pdata_len);
+ loopback->pdata.size = pdata_len;
+ }
+
+ return i40iw_cm_reject(cm_node, pdata, pdata_len);
+}
+
+/**
+ * i40iw_connect - registered call for connection to be established
+ * @cm_id: cm information for passive connection
+ * @conn_param: Information about the connection
+ */
+int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct i40iw_qp *iwqp;
+ struct i40iw_device *iwdev;
+ struct i40iw_cm_node *cm_node;
+ struct i40iw_cm_info cm_info;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ int apbvt_set = 0;
+ enum i40iw_status_code status;
+
+ ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ iwqp = to_iwqp(ibqp);
+ if (!iwqp)
+ return -EINVAL;
+ iwdev = to_iwdev(iwqp->ibqp.device);
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+ if (!(laddr->sin_port) || !(raddr->sin_port))
+ return -EINVAL;
+
+ iwqp->active_conn = 1;
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = iwqp;
+
+ /* set up the connection params for the node */
+ if (cm_id->remote_addr.ss_family == AF_INET) {
+ cm_info.ipv4 = true;
+ memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
+ memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_port = ntohs(raddr->sin_port);
+ cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
+ } else {
+ cm_info.ipv4 = false;
+ i40iw_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ i40iw_copy_ip_ntohl(cm_info.rem_addr,
+ raddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ cm_info.rem_port = ntohs(raddr6->sin6_port);
+ i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
+ }
+ cm_info.cm_id = cm_id;
+ if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+ (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+ raddr6->sin6_addr.in6_u.u6_addr32,
+ sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+ status = i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_ADD,
+ NULL,
+ true);
+ if (status)
+ return -EINVAL;
+ }
+ status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
+ if (status) {
+ i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ return -EINVAL;
+ }
+
+ apbvt_set = 1;
+ cm_id->add_ref(cm_id);
+ cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
+ conn_param->private_data_len,
+ (void *)conn_param->private_data,
+ &cm_info);
+ if (!cm_node) {
+ i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+
+ if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
+ cm_info.loc_port))
+ i40iw_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ I40IW_MANAGE_APBVT_DEL);
+ cm_id->rem_ref(cm_id);
+ iwdev->cm_core.stats_connect_errs++;
+ return -ENOMEM;
+ }
+
+ i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+ !cm_node->ord_size)
+ cm_node->ord_size = 1;
+
+ cm_node->apbvt_set = apbvt_set;
+ cm_node->qhash_set = true;
+ iwqp->cm_node = cm_node;
+ cm_node->iwqp = iwqp;
+ iwqp->cm_id = cm_id;
+ i40iw_add_ref(&iwqp->ibqp);
+ return 0;
+}
+
+/**
+ * i40iw_create_listen - registered call creating listener
+ * @cm_id: cm information for passive connection
+ * @backlog: to max accept pending count
+ */
+int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct i40iw_device *iwdev;
+ struct i40iw_cm_listener *cm_listen_node;
+ struct i40iw_cm_info cm_info;
+ enum i40iw_status_code ret;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in6 *laddr6;
+ bool wildcard = false;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ memset(&cm_info, 0, sizeof(cm_info));
+ if (laddr->sin_family == AF_INET) {
+ cm_info.ipv4 = true;
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+
+ if (laddr->sin_addr.s_addr != INADDR_ANY)
+ cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
+ else
+ wildcard = true;
+
+ } else {
+ cm_info.ipv4 = false;
+ i40iw_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY)
+ i40iw_netdev_vlan_ipv6(cm_info.loc_addr,
+ &cm_info.vlan_id,
+ NULL);
+ else
+ wildcard = true;
+ }
+ cm_info.backlog = backlog;
+ cm_info.cm_id = cm_id;
+
+ cm_listen_node = i40iw_make_listen_node(&iwdev->cm_core, iwdev, &cm_info);
+ if (!cm_listen_node) {
+ i40iw_pr_err("cm_listen_node == NULL\n");
+ return -ENOMEM;
+ }
+
+ cm_id->provider_data = cm_listen_node;
+
+ if (!cm_listen_node->reused_node) {
+ if (wildcard) {
+ if (cm_info.ipv4)
+ ret = i40iw_add_mqh_4(iwdev,
+ &cm_info,
+ cm_listen_node);
+ else
+ ret = i40iw_add_mqh_6(iwdev,
+ &cm_info,
+ cm_listen_node);
+ if (ret)
+ goto error;
+
+ ret = i40iw_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ I40IW_MANAGE_APBVT_ADD);
+
+ if (ret)
+ goto error;
+ } else {
+ ret = i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_ADD,
+ NULL,
+ true);
+ if (ret)
+ goto error;
+ cm_listen_node->qhash_set = true;
+ ret = i40iw_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ I40IW_MANAGE_APBVT_ADD);
+ if (ret)
+ goto error;
+ }
+ }
+ cm_id->add_ref(cm_id);
+ cm_listen_node->cm_core->stats_listen_created++;
+ return 0;
+ error:
+ i40iw_cm_del_listen(&iwdev->cm_core, (void *)cm_listen_node, false);
+ return -EINVAL;
+}
+
+/**
+ * i40iw_destroy_listen - registered call to destroy listener
+ * @cm_id: cm information for passive connection
+ */
+int i40iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+ struct i40iw_device *iwdev;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (cm_id->provider_data)
+ i40iw_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true);
+ else
+ i40iw_pr_err("cm_id->provider_data was NULL\n");
+
+ cm_id->rem_ref(cm_id);
+
+ return 0;
+}
+
+/**
+ * i40iw_cm_event_connected - handle connected active node
+ * @event: the info for cm_node of connection
+ */
+static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
+{
+ struct i40iw_qp *iwqp;
+ struct i40iw_device *iwdev;
+ struct i40iw_cm_node *cm_node;
+ struct i40iw_sc_dev *dev;
+ struct ib_qp_attr attr;
+ struct iw_cm_id *cm_id;
+ int status;
+ bool read0;
+
+ cm_node = event->cm_node;
+ cm_id = cm_node->cm_id;
+ iwqp = (struct i40iw_qp *)cm_id->provider_data;
+ iwdev = to_iwdev(iwqp->ibqp.device);
+ dev = &iwdev->sc_dev;
+
+ if (iwqp->destroyed) {
+ status = -ETIMEDOUT;
+ goto error;
+ }
+ i40iw_cm_init_tsa_conn(iwqp, cm_node);
+ read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+ dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
+ if (iwqp->page)
+ kunmap(iwqp->page);
+ status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
+ if (status)
+ i40iw_pr_err("send cm event\n");
+
+ memset(&attr, 0, sizeof(attr));
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+ cm_node->accelerated = 1;
+ if (cm_node->accept_pend) {
+ if (!cm_node->listener)
+ i40iw_pr_err("listener is null for passive node\n");
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+ return;
+
+error:
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ i40iw_send_cm_event(event->cm_node,
+ cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ status);
+ cm_id->rem_ref(cm_id);
+ i40iw_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * i40iw_cm_event_reset - handle reset
+ * @event: the info for cm_node of connection
+ */
+static void i40iw_cm_event_reset(struct i40iw_cm_event *event)
+{
+ struct i40iw_cm_node *cm_node = event->cm_node;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct i40iw_qp *iwqp;
+
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+ if (!iwqp)
+ return;
+
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "reset event %p - cm_id = %p\n",
+ event->cm_node, cm_id);
+ iwqp->cm_id = NULL;
+
+ i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET);
+ i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
+}
+
+/**
+ * i40iw_cm_event_handler - worker thread callback to send event to cm upper layer
+ * @work: pointer of cm event info.
+ */
+static void i40iw_cm_event_handler(struct work_struct *work)
+{
+ struct i40iw_cm_event *event = container_of(work,
+ struct i40iw_cm_event,
+ event_work);
+ struct i40iw_cm_node *cm_node;
+
+ if (!event || !event->cm_node || !event->cm_node->cm_core)
+ return;
+
+ cm_node = event->cm_node;
+
+ switch (event->type) {
+ case I40IW_CM_EVENT_MPA_REQ:
+ i40iw_send_cm_event(cm_node,
+ cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REQUEST,
+ 0);
+ break;
+ case I40IW_CM_EVENT_RESET:
+ i40iw_cm_event_reset(event);
+ break;
+ case I40IW_CM_EVENT_CONNECTED:
+ if (!event->cm_node->cm_id ||
+ (event->cm_node->state != I40IW_CM_STATE_OFFLOADED))
+ break;
+ i40iw_cm_event_connected(event);
+ break;
+ case I40IW_CM_EVENT_MPA_REJECT:
+ if (!event->cm_node->cm_id ||
+ (cm_node->state == I40IW_CM_STATE_OFFLOADED))
+ break;
+ i40iw_send_cm_event(cm_node,
+ cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNREFUSED);
+ break;
+ case I40IW_CM_EVENT_ABORTED:
+ if (!event->cm_node->cm_id ||
+ (event->cm_node->state == I40IW_CM_STATE_OFFLOADED))
+ break;
+ i40iw_event_connect_error(event);
+ break;
+ default:
+ i40iw_pr_err("event type = %d\n", event->type);
+ break;
+ }
+
+ event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
+ i40iw_rem_ref_cm_node(event->cm_node);
+ kfree(event);
+}
+
+/**
+ * i40iw_cm_post_event - queue event request for worker thread
+ * @event: cm node's info for up event call
+ */
+static void i40iw_cm_post_event(struct i40iw_cm_event *event)
+{
+ atomic_inc(&event->cm_node->ref_count);
+ event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
+ INIT_WORK(&event->event_work, i40iw_cm_event_handler);
+
+ queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
new file mode 100644
index 000000000000..5f8ceb4a8e84
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -0,0 +1,456 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_CM_H
+#define I40IW_CM_H
+
+#define QUEUE_EVENTS
+
+#define I40IW_MANAGE_APBVT_DEL 0
+#define I40IW_MANAGE_APBVT_ADD 1
+
+#define I40IW_MPA_REQUEST_ACCEPT 1
+#define I40IW_MPA_REQUEST_REJECT 2
+
+/* IETF MPA -- defines, enums, structs */
+#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VERSION 1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE 20
+#define IETF_RTR_MSG_SIZE 4
+#define IETF_MPA_V2_FLAG 0x10
+#define SNDMARKER_SEQNMASK 0x000001FF
+
+#define I40IW_MAX_IETF_SIZE 32
+
+#define MPA_ZERO_PAD_LEN 4
+
+/* IETF RTR MSG Fields */
+#define IETF_PEER_TO_PEER 0x8000
+#define IETF_FLPDU_ZERO_LEN 0x4000
+#define IETF_RDMA0_WRITE 0x8000
+#define IETF_RDMA0_READ 0x4000
+#define IETF_NO_IRD_ORD 0x3FFF
+
+/* HW-supported IRD sizes*/
+#define I40IW_HW_IRD_SETTING_2 2
+#define I40IW_HW_IRD_SETTING_4 4
+#define I40IW_HW_IRD_SETTING_8 8
+#define I40IW_HW_IRD_SETTING_16 16
+#define I40IW_HW_IRD_SETTING_32 32
+#define I40IW_HW_IRD_SETTING_64 64
+
+enum ietf_mpa_flags {
+ IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
+ IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */
+ IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
+};
+
+struct ietf_mpa_v1 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ u8 priv_data[0];
+};
+
+#define ietf_mpa_req_resp_frame ietf_mpa_frame
+
+struct ietf_rtr_msg {
+ __be16 ctrl_ird;
+ __be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ struct ietf_rtr_msg rtr_msg;
+ u8 priv_data[0];
+};
+
+struct i40iw_cm_node;
+enum i40iw_timer_type {
+ I40IW_TIMER_TYPE_SEND,
+ I40IW_TIMER_TYPE_RECV,
+ I40IW_TIMER_NODE_CLEANUP,
+ I40IW_TIMER_TYPE_CLOSE,
+};
+
+#define I40IW_PASSIVE_STATE_INDICATED 0
+#define I40IW_DO_NOT_SEND_RESET_EVENT 1
+#define I40IW_SEND_RESET_EVENT 2
+
+#define MAX_I40IW_IFS 4
+
+#define SET_ACK 0x1
+#define SET_SYN 0x2
+#define SET_FIN 0x4
+#define SET_RST 0x8
+
+#define TCP_OPTIONS_PADDING 3
+
+struct option_base {
+ u8 optionnum;
+ u8 length;
+};
+
+enum option_numbers {
+ OPTION_NUMBER_END,
+ OPTION_NUMBER_NONE,
+ OPTION_NUMBER_MSS,
+ OPTION_NUMBER_WINDOW_SCALE,
+ OPTION_NUMBER_SACK_PERM,
+ OPTION_NUMBER_SACK,
+ OPTION_NUMBER_WRITE0 = 0xbc
+};
+
+struct option_mss {
+ u8 optionnum;
+ u8 length;
+ __be16 mss;
+};
+
+struct option_windowscale {
+ u8 optionnum;
+ u8 length;
+ u8 shiftcount;
+};
+
+union all_known_options {
+ char as_end;
+ struct option_base as_base;
+ struct option_mss as_mss;
+ struct option_windowscale as_windowscale;
+};
+
+struct i40iw_timer_entry {
+ struct list_head list;
+ unsigned long timetosend; /* jiffies */
+ struct i40iw_puda_buf *sqbuf;
+ u32 type;
+ u32 retrycount;
+ u32 retranscount;
+ u32 context;
+ u32 send_retrans;
+ int close_when_complete;
+};
+
+#define I40IW_DEFAULT_RETRYS 64
+#define I40IW_DEFAULT_RETRANS 8
+#define I40IW_DEFAULT_TTL 0x40
+#define I40IW_DEFAULT_RTT_VAR 0x6
+#define I40IW_DEFAULT_SS_THRESH 0x3FFFFFFF
+#define I40IW_DEFAULT_REXMIT_THRESH 8
+
+#define I40IW_RETRY_TIMEOUT HZ
+#define I40IW_SHORT_TIME 10
+#define I40IW_LONG_TIME (2 * HZ)
+#define I40IW_MAX_TIMEOUT ((unsigned long)(12 * HZ))
+
+#define I40IW_CM_HASHTABLE_SIZE 1024
+#define I40IW_CM_TCP_TIMER_INTERVAL 3000
+#define I40IW_CM_DEFAULT_MTU 1540
+#define I40IW_CM_DEFAULT_FRAME_CNT 10
+#define I40IW_CM_THREAD_STACK_SIZE 256
+#define I40IW_CM_DEFAULT_RCV_WND 64240
+#define I40IW_CM_DEFAULT_RCV_WND_SCALED 0x3fffc
+#define I40IW_CM_DEFAULT_RCV_WND_SCALE 2
+#define I40IW_CM_DEFAULT_FREE_PKTS 0x000A
+#define I40IW_CM_FREE_PKT_LO_WATERMARK 2
+
+#define I40IW_CM_DEFAULT_MSS 536
+
+#define I40IW_CM_DEF_SEQ 0x159bf75f
+#define I40IW_CM_DEF_LOCAL_ID 0x3b47
+
+#define I40IW_CM_DEF_SEQ2 0x18ed5740
+#define I40IW_CM_DEF_LOCAL_ID2 0xb807
+#define MAX_CM_BUFFER (I40IW_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
+
+typedef u32 i40iw_addr_t;
+
+#define i40iw_cm_tsa_context i40iw_qp_context
+
+struct i40iw_qp;
+
+/* cm node transition states */
+enum i40iw_cm_node_state {
+ I40IW_CM_STATE_UNKNOWN,
+ I40IW_CM_STATE_INITED,
+ I40IW_CM_STATE_LISTENING,
+ I40IW_CM_STATE_SYN_RCVD,
+ I40IW_CM_STATE_SYN_SENT,
+ I40IW_CM_STATE_ONE_SIDE_ESTABLISHED,
+ I40IW_CM_STATE_ESTABLISHED,
+ I40IW_CM_STATE_ACCEPTING,
+ I40IW_CM_STATE_MPAREQ_SENT,
+ I40IW_CM_STATE_MPAREQ_RCVD,
+ I40IW_CM_STATE_MPAREJ_RCVD,
+ I40IW_CM_STATE_OFFLOADED,
+ I40IW_CM_STATE_FIN_WAIT1,
+ I40IW_CM_STATE_FIN_WAIT2,
+ I40IW_CM_STATE_CLOSE_WAIT,
+ I40IW_CM_STATE_TIME_WAIT,
+ I40IW_CM_STATE_LAST_ACK,
+ I40IW_CM_STATE_CLOSING,
+ I40IW_CM_STATE_LISTENER_DESTROYED,
+ I40IW_CM_STATE_CLOSED
+};
+
+enum mpa_frame_version {
+ IETF_MPA_V1 = 1,
+ IETF_MPA_V2 = 2
+};
+
+enum mpa_frame_key {
+ MPA_KEY_REQUEST,
+ MPA_KEY_REPLY
+};
+
+enum send_rdma0 {
+ SEND_RDMA_READ_ZERO = 1,
+ SEND_RDMA_WRITE_ZERO = 2
+};
+
+enum i40iw_tcpip_pkt_type {
+ I40IW_PKT_TYPE_UNKNOWN,
+ I40IW_PKT_TYPE_SYN,
+ I40IW_PKT_TYPE_SYNACK,
+ I40IW_PKT_TYPE_ACK,
+ I40IW_PKT_TYPE_FIN,
+ I40IW_PKT_TYPE_RST
+};
+
+/* CM context params */
+struct i40iw_cm_tcp_context {
+ u8 client;
+
+ u32 loc_seq_num;
+ u32 loc_ack_num;
+ u32 rem_ack_num;
+ u32 rcv_nxt;
+
+ u32 loc_id;
+ u32 rem_id;
+
+ u32 snd_wnd;
+ u32 max_snd_wnd;
+
+ u32 rcv_wnd;
+ u32 mss;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+
+ struct timeval sent_ts;
+};
+
+enum i40iw_cm_listener_state {
+ I40IW_CM_LISTENER_PASSIVE_STATE = 1,
+ I40IW_CM_LISTENER_ACTIVE_STATE = 2,
+ I40IW_CM_LISTENER_EITHER_STATE = 3
+};
+
+struct i40iw_cm_listener {
+ struct list_head list;
+ struct i40iw_cm_core *cm_core;
+ u8 loc_mac[ETH_ALEN];
+ u32 loc_addr[4];
+ u16 loc_port;
+ u32 map_loc_addr[4];
+ u16 map_loc_port;
+ struct iw_cm_id *cm_id;
+ atomic_t ref_count;
+ struct i40iw_device *iwdev;
+ atomic_t pend_accepts_cnt;
+ int backlog;
+ enum i40iw_cm_listener_state listener_state;
+ u32 reused_node;
+ u8 user_pri;
+ u16 vlan_id;
+ bool qhash_set;
+ bool ipv4;
+ struct list_head child_listen_list;
+
+};
+
+struct i40iw_kmem_info {
+ void *addr;
+ u32 size;
+};
+
+/* per connection node and node state information */
+struct i40iw_cm_node {
+ u32 loc_addr[4], rem_addr[4];
+ u16 loc_port, rem_port;
+ u32 map_loc_addr[4], map_rem_addr[4];
+ u16 map_loc_port, map_rem_port;
+ u16 vlan_id;
+ enum i40iw_cm_node_state state;
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+ atomic_t ref_count;
+ struct i40iw_qp *iwqp;
+ struct i40iw_device *iwdev;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_cm_tcp_context tcp_cntxt;
+ struct i40iw_cm_core *cm_core;
+ struct i40iw_cm_node *loopbackpartner;
+ struct i40iw_timer_entry *send_entry;
+ struct i40iw_timer_entry *close_entry;
+ spinlock_t retrans_list_lock; /* cm transmit packet */
+ enum send_rdma0 send_rdma0_op;
+ u16 ird_size;
+ u16 ord_size;
+ u16 mpav2_ird_ord;
+ struct iw_cm_id *cm_id;
+ struct list_head list;
+ int accelerated;
+ struct i40iw_cm_listener *listener;
+ int apbvt_set;
+ int accept_pend;
+ struct list_head timer_entry;
+ struct list_head reset_entry;
+ atomic_t passive_state;
+ bool qhash_set;
+ u8 user_pri;
+ bool ipv4;
+ bool snd_mark_en;
+ u16 lsmm_size;
+ enum mpa_frame_version mpa_frame_rev;
+ struct i40iw_kmem_info pdata;
+ union {
+ struct ietf_mpa_v1 mpa_frame;
+ struct ietf_mpa_v2 mpa_v2_frame;
+ };
+
+ u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
+ struct i40iw_kmem_info mpa_hdr;
+};
+
+/* structure for client or CM to fill when making CM api calls. */
+/* - only need to set relevant data, based on op. */
+struct i40iw_cm_info {
+ struct iw_cm_id *cm_id;
+ u16 loc_port;
+ u16 rem_port;
+ u32 loc_addr[4];
+ u32 rem_addr[4];
+ u16 map_loc_port;
+ u16 map_rem_port;
+ u32 map_loc_addr[4];
+ u32 map_rem_addr[4];
+ u16 vlan_id;
+ int backlog;
+ u16 user_pri;
+ bool ipv4;
+};
+
+/* CM event codes */
+enum i40iw_cm_event_type {
+ I40IW_CM_EVENT_UNKNOWN,
+ I40IW_CM_EVENT_ESTABLISHED,
+ I40IW_CM_EVENT_MPA_REQ,
+ I40IW_CM_EVENT_MPA_CONNECT,
+ I40IW_CM_EVENT_MPA_ACCEPT,
+ I40IW_CM_EVENT_MPA_REJECT,
+ I40IW_CM_EVENT_MPA_ESTABLISHED,
+ I40IW_CM_EVENT_CONNECTED,
+ I40IW_CM_EVENT_RESET,
+ I40IW_CM_EVENT_ABORTED
+};
+
+/* event to post to CM event handler */
+struct i40iw_cm_event {
+ enum i40iw_cm_event_type type;
+ struct i40iw_cm_info cm_info;
+ struct work_struct event_work;
+ struct i40iw_cm_node *cm_node;
+};
+
+struct i40iw_cm_core {
+ struct i40iw_device *iwdev;
+ struct i40iw_sc_dev *dev;
+
+ struct list_head listen_nodes;
+ struct list_head connected_nodes;
+
+ struct timer_list tcp_timer;
+
+ struct workqueue_struct *event_wq;
+ struct workqueue_struct *disconn_wq;
+
+ spinlock_t ht_lock; /* manage hash table */
+ spinlock_t listen_list_lock; /* listen list */
+
+ u64 stats_nodes_created;
+ u64 stats_nodes_destroyed;
+ u64 stats_listen_created;
+ u64 stats_listen_destroyed;
+ u64 stats_listen_nodes_created;
+ u64 stats_listen_nodes_destroyed;
+ u64 stats_loopbacks;
+ u64 stats_accepts;
+ u64 stats_rejects;
+ u64 stats_connect_errs;
+ u64 stats_passive_errs;
+ u64 stats_pkt_retrans;
+ u64 stats_backlog_drops;
+};
+
+int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
+ struct i40iw_puda_buf *sqbuf,
+ enum i40iw_timer_type type,
+ int send_retrans,
+ int close_when_complete);
+
+int i40iw_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
+int i40iw_reject(struct iw_cm_id *, const void *, u8);
+int i40iw_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
+int i40iw_create_listen(struct iw_cm_id *, int);
+int i40iw_destroy_listen(struct iw_cm_id *);
+
+int i40iw_cm_start(struct i40iw_device *);
+int i40iw_cm_stop(struct i40iw_device *);
+
+int i40iw_arp_table(struct i40iw_device *iwdev,
+ u32 *ip_addr,
+ bool ipv4,
+ u8 *mac_addr,
+ u32 action);
+
+#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
new file mode 100644
index 000000000000..f05802bf6ca0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -0,0 +1,4743 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * i40iw_insert_wqe_hdr - write wqe header
+ * @wqe: cqp wqe for header
+ * @header: header for the cqp wqe
+ */
+static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header)
+{
+ wmb(); /* make sure WQE is populated before polarity is set */
+ set_64bit_val(wqe, 24, header);
+}
+
+/**
+ * i40iw_get_cqp_reg_info - get head and tail for cqp using registers
+ * @cqp: struct for cqp hw
+ * @val: cqp tail register value
+ * @tail:wqtail register value
+ * @error: cqp processing err
+ */
+static inline void i40iw_get_cqp_reg_info(struct i40iw_sc_cqp *cqp,
+ u32 *val,
+ u32 *tail,
+ u32 *error)
+{
+ if (cqp->dev->is_pf) {
+ *val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPTAIL);
+ *tail = RS_32(*val, I40E_PFPE_CQPTAIL_WQTAIL);
+ *error = RS_32(*val, I40E_PFPE_CQPTAIL_CQP_OP_ERR);
+ } else {
+ *val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPTAIL1);
+ *tail = RS_32(*val, I40E_VFPE_CQPTAIL_WQTAIL);
+ *error = RS_32(*val, I40E_VFPE_CQPTAIL_CQP_OP_ERR);
+ }
+}
+
+/**
+ * i40iw_cqp_poll_registers - poll cqp registers
+ * @cqp: struct for cqp hw
+ * @tail:wqtail register value
+ * @count: how many times to try for completion
+ */
+static enum i40iw_status_code i40iw_cqp_poll_registers(
+ struct i40iw_sc_cqp *cqp,
+ u32 tail,
+ u32 count)
+{
+ u32 i = 0;
+ u32 newtail, error, val;
+
+ while (i < count) {
+ i++;
+ i40iw_get_cqp_reg_info(cqp, &val, &newtail, &error);
+ if (error) {
+ error = (cqp->dev->is_pf) ?
+ i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES) :
+ i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
+ return I40IW_ERR_CQP_COMPL_ERROR;
+ }
+ if (newtail != tail) {
+ /* SUCCESS */
+ I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+ return 0;
+ }
+ udelay(I40IW_SLEEP_COUNT);
+ }
+ return I40IW_ERR_TIMEOUT;
+}
+
+/**
+ * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
+ * @buf: ptr to fpm commit buffer
+ * @info: ptr to i40iw_hmc_obj_info struct
+ *
+ * parses fpm commit info and copy base value
+ * of hmc objects in hmc_info
+ */
+static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
+ u64 *buf,
+ struct i40iw_hmc_obj_info *info)
+{
+ u64 temp;
+ u32 i, j;
+ u32 low;
+
+ /* copy base values in obj_info */
+ for (i = I40IW_HMC_IW_QP, j = 0;
+ i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
+ get_64bit_val(buf, j, &temp);
+ info[i].base = RS_64_1(temp, 32) * 512;
+ low = (u32)(temp);
+ if (low)
+ info[i].cnt = low;
+ }
+ return 0;
+}
+
+/**
+ * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer
+ * @buf: ptr to fpm query buffer
+ * @info: ptr to i40iw_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ *
+ * parses fpm query buffer and copy max_cnt and
+ * size value of hmc objects in hmc_info
+ */
+static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf(
+ u64 *buf,
+ struct i40iw_hmc_info *hmc_info,
+ struct i40iw_hmc_fpm_misc *hmc_fpm_misc)
+{
+ u64 temp;
+ struct i40iw_hmc_obj_info *obj_info;
+ u32 i, j, size;
+ u16 max_pe_sds;
+
+ obj_info = hmc_info->hmc_obj;
+
+ get_64bit_val(buf, 0, &temp);
+ hmc_info->first_sd_index = (u16)RS_64(temp, I40IW_QUERY_FPM_FIRST_PE_SD_INDEX);
+ max_pe_sds = (u16)RS_64(temp, I40IW_QUERY_FPM_MAX_PE_SDS);
+
+ /* Reduce SD count for VFs by 1 to account for PBLE backing page rounding */
+ if (hmc_info->hmc_fn_id >= I40IW_FIRST_VF_FPM_ID)
+ max_pe_sds--;
+ hmc_fpm_misc->max_sds = max_pe_sds;
+ hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
+
+ for (i = I40IW_HMC_IW_QP, j = 8;
+ i <= I40IW_HMC_IW_ARP; i++, j += 8) {
+ get_64bit_val(buf, j, &temp);
+ if (i == I40IW_HMC_IW_QP)
+ obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS);
+ else if (i == I40IW_HMC_IW_CQ)
+ obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS);
+ else
+ obj_info[i].max_cnt = (u32)temp;
+
+ size = (u32)RS_64_1(temp, 32);
+ obj_info[i].size = ((u64)1 << size);
+ }
+ for (i = I40IW_HMC_IW_MR, j = 48;
+ i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
+ get_64bit_val(buf, j, &temp);
+ obj_info[i].max_cnt = (u32)temp;
+ size = (u32)RS_64_1(temp, 32);
+ obj_info[i].size = LS_64_1(1, size);
+ }
+
+ get_64bit_val(buf, 120, &temp);
+ hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS);
+ get_64bit_val(buf, 120, &temp);
+ hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER);
+ get_64bit_val(buf, 120, &temp);
+ hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET);
+ get_64bit_val(buf, 64, &temp);
+ hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE);
+ if (!hmc_fpm_misc->xf_block_size)
+ return I40IW_ERR_INVALID_SIZE;
+ get_64bit_val(buf, 80, &temp);
+ hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE);
+ if (!hmc_fpm_misc->q1_block_size)
+ return I40IW_ERR_INVALID_SIZE;
+ return 0;
+}
+
+/**
+ * i40iw_sc_pd_init - initialize sc pd struct
+ * @dev: sc device struct
+ * @pd: sc pd ptr
+ * @pd_id: pd_id for allocated pd
+ */
+static void i40iw_sc_pd_init(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_pd *pd,
+ u16 pd_id)
+{
+ pd->size = sizeof(*pd);
+ pd->pd_id = pd_id;
+ pd->dev = dev;
+}
+
+/**
+ * i40iw_get_encoded_wqe_size - given wq size, returns hardware encoded size
+ * @wqsize: size of the wq (sq, rq, srq) to encoded_size
+ * @cqpsq: encoded size for sq for cqp as its encoded size is 1+ other wq's
+ */
+u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq)
+{
+ u8 encoded_size = 0;
+
+ /* cqp sq's hw coded value starts from 1 for size of 4
+ * while it starts from 0 for qp' wq's.
+ */
+ if (cqpsq)
+ encoded_size = 1;
+ wqsize >>= 2;
+ while (wqsize >>= 1)
+ encoded_size++;
+ return encoded_size;
+}
+
+/**
+ * i40iw_sc_cqp_init - Initialize buffers for a control Queue Pair
+ * @cqp: IWARP control queue pair pointer
+ * @info: IWARP control queue pair init info pointer
+ *
+ * Initializes the object and context buffers for a control Queue Pair.
+ */
+static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
+ struct i40iw_cqp_init_info *info)
+{
+ u8 hw_sq_size;
+
+ if ((info->sq_size > I40IW_CQP_SW_SQSIZE_2048) ||
+ (info->sq_size < I40IW_CQP_SW_SQSIZE_4) ||
+ ((info->sq_size & (info->sq_size - 1))))
+ return I40IW_ERR_INVALID_SIZE;
+
+ hw_sq_size = i40iw_get_encoded_wqe_size(info->sq_size, true);
+ cqp->size = sizeof(*cqp);
+ cqp->sq_size = info->sq_size;
+ cqp->hw_sq_size = hw_sq_size;
+ cqp->sq_base = info->sq;
+ cqp->host_ctx = info->host_ctx;
+ cqp->sq_pa = info->sq_pa;
+ cqp->host_ctx_pa = info->host_ctx_pa;
+ cqp->dev = info->dev;
+ cqp->struct_ver = info->struct_ver;
+ cqp->scratch_array = info->scratch_array;
+ cqp->polarity = 0;
+ cqp->en_datacenter_tcp = info->en_datacenter_tcp;
+ cqp->enabled_vf_count = info->enabled_vf_count;
+ cqp->hmc_profile = info->hmc_profile;
+ info->dev->cqp = cqp;
+
+ I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
+ i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
+ "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
+ __func__, cqp->sq_size, cqp->hw_sq_size,
+ cqp->sq_base, cqp->sq_pa, cqp, cqp->polarity);
+ return 0;
+}
+
+/**
+ * i40iw_sc_cqp_create - create cqp during bringup
+ * @cqp: struct for cqp hw
+ * @disable_pfpdus: if pfpdu to be disabled
+ * @maj_err: If error, major err number
+ * @min_err: If error, minor err number
+ */
+static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
+ bool disable_pfpdus,
+ u16 *maj_err,
+ u16 *min_err)
+{
+ u64 temp;
+ u32 cnt = 0, p1, p2, val = 0, err_code;
+ enum i40iw_status_code ret_code;
+
+ ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
+ &cqp->sdbuf,
+ 128,
+ I40IW_SD_BUF_ALIGNMENT);
+
+ if (ret_code)
+ goto exit;
+
+ temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
+ LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
+
+ if (disable_pfpdus)
+ temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS);
+
+ set_64bit_val(cqp->host_ctx, 0, temp);
+ set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
+ temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
+ LS_64(cqp->hmc_profile, I40IW_CQPHC_HMC_PROFILE);
+ set_64bit_val(cqp->host_ctx, 16, temp);
+ set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
+ set_64bit_val(cqp->host_ctx, 32, 0);
+ set_64bit_val(cqp->host_ctx, 40, 0);
+ set_64bit_val(cqp->host_ctx, 48, 0);
+ set_64bit_val(cqp->host_ctx, 56, 0);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQP_HOST_CTX",
+ cqp->host_ctx, I40IW_CQP_CTX_SIZE * 8);
+
+ p1 = RS_32_1(cqp->host_ctx_pa, 32);
+ p2 = (u32)cqp->host_ctx_pa;
+
+ if (cqp->dev->is_pf) {
+ i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, p1);
+ i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, p2);
+ } else {
+ i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, p1);
+ i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, p2);
+ }
+ do {
+ if (cnt++ > I40IW_DONE_COUNT) {
+ i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
+ ret_code = I40IW_ERR_TIMEOUT;
+ /*
+ * read PFPE_CQPERRORCODES register to get the minor
+ * and major error code
+ */
+ if (cqp->dev->is_pf)
+ err_code = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES);
+ else
+ err_code = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
+ *min_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE);
+ *maj_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE);
+ goto exit;
+ }
+ udelay(I40IW_SLEEP_COUNT);
+ if (cqp->dev->is_pf)
+ val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CCQPSTATUS);
+ else
+ val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CCQPSTATUS1);
+ } while (!val);
+
+exit:
+ if (!ret_code)
+ cqp->process_cqp_sds = i40iw_update_sds_noccq;
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_cqp_post_sq - post of cqp's sq
+ * @cqp: struct for cqp hw
+ */
+void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp)
+{
+ if (cqp->dev->is_pf)
+ i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
+ else
+ i40iw_wr32(cqp->dev->hw, I40E_VFPE_CQPDB1, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
+
+ i40iw_debug(cqp->dev,
+ I40IW_DEBUG_WQE,
+ "%s: HEAD_TAIL[%04d,%04d,%04d]\n",
+ __func__,
+ cqp->sq_ring.head,
+ cqp->sq_ring.tail,
+ cqp->sq_ring.size);
+}
+
+/**
+ * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
+ * @cqp: struct for cqp hw
+ * @wqe_idx: we index of cqp ring
+ */
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
+{
+ u64 *wqe = NULL;
+ u32 wqe_idx;
+ enum i40iw_status_code ret_code;
+
+ if (I40IW_RING_FULL_ERR(cqp->sq_ring)) {
+ i40iw_debug(cqp->dev,
+ I40IW_DEBUG_WQE,
+ "%s: ring is full head %x tail %x size %x\n",
+ __func__,
+ cqp->sq_ring.head,
+ cqp->sq_ring.tail,
+ cqp->sq_ring.size);
+ return NULL;
+ }
+ I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+ if (!wqe_idx)
+ cqp->polarity = !cqp->polarity;
+
+ wqe = cqp->sq_base[wqe_idx].elem;
+ cqp->scratch_array[wqe_idx] = scratch;
+ I40IW_CQP_INIT_WQE(wqe);
+
+ return wqe;
+}
+
+/**
+ * i40iw_sc_cqp_destroy - destroy cqp during close
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_cqp_destroy(struct i40iw_sc_cqp *cqp)
+{
+ u32 cnt = 0, val = 1;
+ enum i40iw_status_code ret_code = 0;
+ u32 cqpstat_addr;
+
+ if (cqp->dev->is_pf) {
+ i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, 0);
+ i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, 0);
+ cqpstat_addr = I40E_PFPE_CCQPSTATUS;
+ } else {
+ i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, 0);
+ i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, 0);
+ cqpstat_addr = I40E_VFPE_CCQPSTATUS1;
+ }
+ do {
+ if (cnt++ > I40IW_DONE_COUNT) {
+ ret_code = I40IW_ERR_TIMEOUT;
+ break;
+ }
+ udelay(I40IW_SLEEP_COUNT);
+ val = i40iw_rd32(cqp->dev->hw, cqpstat_addr);
+ } while (val);
+
+ i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_ccq_arm - enable intr for control cq
+ * @ccq: ccq sc struct
+ */
+static void i40iw_sc_ccq_arm(struct i40iw_sc_cq *ccq)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se;
+ u8 arm_seq_num;
+
+ /* write to cq doorbell shadow area */
+ /* arm next se should always be zero */
+ get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
+
+ sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
+ arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
+
+ arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
+ arm_seq_num++;
+
+ temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
+ LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
+ LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
+ LS_64(1, I40IW_CQ_DBSA_ARM_NEXT);
+
+ set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
+
+ wmb(); /* make sure shadow area is updated before arming */
+
+ if (ccq->dev->is_pf)
+ i40iw_wr32(ccq->dev->hw, I40E_PFPE_CQARM, ccq->cq_uk.cq_id);
+ else
+ i40iw_wr32(ccq->dev->hw, I40E_VFPE_CQARM1, ccq->cq_uk.cq_id);
+}
+
+/**
+ * i40iw_sc_ccq_get_cqe_info - get ccq's cq entry
+ * @ccq: ccq sc struct
+ * @info: completion q entry to return
+ */
+static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
+ struct i40iw_sc_cq *ccq,
+ struct i40iw_ccq_cqe_info *info)
+{
+ u64 qp_ctx, temp, temp1;
+ u64 *cqe;
+ struct i40iw_sc_cqp *cqp;
+ u32 wqe_idx;
+ u8 polarity;
+ enum i40iw_status_code ret_code = 0;
+
+ if (ccq->cq_uk.avoid_mem_cflct)
+ cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(&ccq->cq_uk);
+ else
+ cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&ccq->cq_uk);
+
+ get_64bit_val(cqe, 24, &temp);
+ polarity = (u8)RS_64(temp, I40IW_CQ_VALID);
+ if (polarity != ccq->cq_uk.polarity)
+ return I40IW_ERR_QUEUE_EMPTY;
+
+ get_64bit_val(cqe, 8, &qp_ctx);
+ cqp = (struct i40iw_sc_cqp *)(unsigned long)qp_ctx;
+ info->error = (bool)RS_64(temp, I40IW_CQ_ERROR);
+ info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
+ if (info->error) {
+ info->maj_err_code = (u16)RS_64(temp, I40IW_CQ_MAJERR);
+ info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
+ }
+ wqe_idx = (u32)RS_64(temp, I40IW_CQ_WQEIDX);
+ info->scratch = cqp->scratch_array[wqe_idx];
+
+ get_64bit_val(cqe, 16, &temp1);
+ info->op_ret_val = (u32)RS_64(temp1, I40IW_CCQ_OPRETVAL);
+ get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
+ info->op_code = (u8)RS_64(temp1, I40IW_CQPSQ_OPCODE);
+ info->cqp = cqp;
+
+ /* move the head for cq */
+ I40IW_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
+ if (I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring) == 0)
+ ccq->cq_uk.polarity ^= 1;
+
+ /* update cq tail in cq shadow memory also */
+ I40IW_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
+ set_64bit_val(ccq->cq_uk.shadow_area,
+ 0,
+ I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
+ wmb(); /* write shadow area before tail */
+ I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
+ * @cqp: struct for cqp hw
+ * @op_code: cqp opcode for completion
+ * @info: completion q entry to return
+ */
+static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
+ struct i40iw_sc_cqp *cqp,
+ u8 op_code,
+ struct i40iw_ccq_cqe_info *compl_info)
+{
+ struct i40iw_ccq_cqe_info info;
+ struct i40iw_sc_cq *ccq;
+ enum i40iw_status_code ret_code = 0;
+ u32 cnt = 0;
+
+ memset(&info, 0, sizeof(info));
+ ccq = cqp->dev->ccq;
+ while (1) {
+ if (cnt++ > I40IW_DONE_COUNT)
+ return I40IW_ERR_TIMEOUT;
+
+ if (i40iw_sc_ccq_get_cqe_info(ccq, &info)) {
+ udelay(I40IW_SLEEP_COUNT);
+ continue;
+ }
+
+ if (info.error) {
+ ret_code = I40IW_ERR_CQP_COMPL_ERROR;
+ break;
+ }
+ /* check if opcode is cq create */
+ if (op_code != info.op_code) {
+ i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
+ "%s: opcode mismatch for my op code 0x%x, returned opcode %x\n",
+ __func__, op_code, info.op_code);
+ }
+ /* success, exit out of the loop */
+ if (op_code == info.op_code)
+ break;
+ }
+
+ if (compl_info)
+ memcpy(compl_info, &info, sizeof(*compl_info));
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_manage_push_page - Handle push page
+ * @cqp: struct for cqp hw
+ * @info: push page info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_push_page(
+ struct i40iw_sc_cqp *cqp,
+ struct i40iw_cqp_manage_push_page_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT)
+ return I40IW_ERR_INVALID_PUSH_PAGE_INDEX;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16, info->qs_handle);
+
+ header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) |
+ LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+ LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_manage_hmc_pm_func_table - manage of function table
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @vf_index: vf index for cqp
+ * @free_pm_fcn: function number
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u8 vf_index,
+ bool free_pm_fcn,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ if (vf_index >= I40IW_MAX_VF_PER_PF)
+ return I40IW_ERR_INVALID_VF_ID;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ header = LS_64(vf_index, I40IW_CQPSQ_MHMC_VFIDX) |
+ LS_64(I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, I40IW_CQPSQ_OPCODE) |
+ LS_64(free_pm_fcn, I40IW_CQPSQ_MHMC_FREEPMFN) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_set_hmc_resource_profile - cqp wqe for hmc profile
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_profile_type: type of profile to set
+ * @vf_num: vf number for profile
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_set_hmc_resource_profile(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u8 hmc_profile_type,
+ u8 vf_num, bool post_sq,
+ bool poll_registers)
+{
+ u64 *wqe;
+ u64 header;
+ u32 val, tail, error;
+ enum i40iw_status_code ret_code = 0;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16,
+ (LS_64(hmc_profile_type, I40IW_CQPSQ_SHMCRP_HMC_PROFILE) |
+ LS_64(vf_num, I40IW_CQPSQ_SHMCRP_VFNUM)));
+
+ header = LS_64(I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+ if (error)
+ return I40IW_ERR_CQP_COMPL_ERROR;
+
+ if (post_sq) {
+ i40iw_sc_cqp_post_sq(cqp);
+ if (poll_registers)
+ ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000000);
+ else
+ ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
+ I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
+ NULL);
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_manage_hmc_pm_func_table_done - wait for cqp wqe completion for function table
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table_done(struct i40iw_sc_cqp *cqp)
+{
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, NULL);
+}
+
+/**
+ * i40iw_sc_commit_fpm_values_done - wait for cqp eqe completion for fpm commit
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cqp *cqp)
+{
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_COMMIT_FPM_VALUES, NULL);
+}
+
+/**
+ * i40iw_sc_commit_fpm_values - cqp wqe for commit fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @commit_fpm_mem; Memory for fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_commit_fpm_values(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u8 hmc_fn_id,
+ struct i40iw_dma_mem *commit_fpm_mem,
+ bool post_sq,
+ u8 wait_type)
+{
+ u64 *wqe;
+ u64 header;
+ u32 tail, val, error;
+ enum i40iw_status_code ret_code = 0;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16, hmc_fn_id);
+ set_64bit_val(wqe, 32, commit_fpm_mem->pa);
+
+ header = LS_64(I40IW_CQP_OP_COMMIT_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "COMMIT_FPM_VALUES WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+ if (error)
+ return I40IW_ERR_CQP_COMPL_ERROR;
+
+ if (post_sq) {
+ i40iw_sc_cqp_post_sq(cqp);
+
+ if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
+ ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+ else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
+ ret_code = i40iw_sc_commit_fpm_values_done(cqp);
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_query_fpm_values_done(struct i40iw_sc_cqp *cqp)
+{
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_QUERY_FPM_VALUES, NULL);
+}
+
+/**
+ * i40iw_sc_query_fpm_values - cqp wqe query fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @query_fpm_mem: memory for return fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_query_fpm_values(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u8 hmc_fn_id,
+ struct i40iw_dma_mem *query_fpm_mem,
+ bool post_sq,
+ u8 wait_type)
+{
+ u64 *wqe;
+ u64 header;
+ u32 tail, val, error;
+ enum i40iw_status_code ret_code = 0;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16, hmc_fn_id);
+ set_64bit_val(wqe, 32, query_fpm_mem->pa);
+
+ header = LS_64(I40IW_CQP_OP_QUERY_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_FPM WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ /* read the tail from CQP_TAIL register */
+ i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (error)
+ return I40IW_ERR_CQP_COMPL_ERROR;
+
+ if (post_sq) {
+ i40iw_sc_cqp_post_sq(cqp);
+ if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
+ ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+ else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
+ ret_code = i40iw_sc_query_fpm_values_done(cqp);
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_add_arp_cache_entry - cqp wqe add arp cache entry
+ * @cqp: struct for cqp hw
+ * @info: arp entry information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_add_arp_cache_entry(
+ struct i40iw_sc_cqp *cqp,
+ struct i40iw_add_arp_cache_entry_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 temp, header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 8, info->reach_max);
+
+ temp = info->mac_addr[5] |
+ LS_64_1(info->mac_addr[4], 8) |
+ LS_64_1(info->mac_addr[3], 16) |
+ LS_64_1(info->mac_addr[2], 24) |
+ LS_64_1(info->mac_addr[1], 32) |
+ LS_64_1(info->mac_addr[0], 40);
+
+ set_64bit_val(wqe, 16, temp);
+
+ header = info->arp_index |
+ LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+ LS_64((info->permanent ? 1 : 0), I40IW_CQPSQ_MAT_PERMANENT) |
+ LS_64(1, I40IW_CQPSQ_MAT_ENTRYVALID) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_ENTRY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_del_arp_cache_entry - dele arp cache entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_del_arp_cache_entry(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u16 arp_index,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ header = arp_index |
+ LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_DEL_ENTRY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_query_arp_cache_entry - cqp wqe to query arp and arp index
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_query_arp_cache_entry(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u16 arp_index,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ header = arp_index |
+ LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_MAT_QUERY) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_ARP_CACHE_ENTRY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_manage_apbvt_entry - for adding and deleting apbvt entries
+ * @cqp: struct for cqp hw
+ * @info: info for apbvt entry to add or delete
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_apbvt_entry(
+ struct i40iw_sc_cqp *cqp,
+ struct i40iw_apbvt_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16, info->port);
+
+ header = LS_64(I40IW_CQP_OP_MANAGE_APBVT, I40IW_CQPSQ_OPCODE) |
+ LS_64(info->add, I40IW_CQPSQ_MAPT_ADDPORT) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_APBVT WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_manage_qhash_table_entry - manage quad hash entries
+ * @cqp: struct for cqp hw
+ * @info: info for quad hash to manage
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ *
+ * This is called before connection establishment is started. For passive connections, when
+ * listener is created, it will call with entry type of I40IW_QHASH_TYPE_TCP_SYN with local
+ * ip address and tcp port. When SYN is received (passive connections) or
+ * sent (active connections), this routine is called with entry type of
+ * I40IW_QHASH_TYPE_TCP_ESTABLISHED and quad is passed in info.
+ *
+ * When iwarp connection is done and its state moves to RTS, the quad hash entry in
+ * the hardware will point to iwarp's qp number and requires no calls from the driver.
+ */
+static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
+ struct i40iw_sc_cqp *cqp,
+ struct i40iw_qhash_table_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 qw1 = 0;
+ u64 qw2 = 0;
+ u64 temp;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ temp = info->mac_addr[5] |
+ LS_64_1(info->mac_addr[4], 8) |
+ LS_64_1(info->mac_addr[3], 16) |
+ LS_64_1(info->mac_addr[2], 24) |
+ LS_64_1(info->mac_addr[1], 32) |
+ LS_64_1(info->mac_addr[0], 40);
+
+ set_64bit_val(wqe, 0, temp);
+
+ qw1 = LS_64(info->qp_num, I40IW_CQPSQ_QHASH_QPN) |
+ LS_64(info->dest_port, I40IW_CQPSQ_QHASH_DEST_PORT);
+ if (info->ipv4_valid) {
+ set_64bit_val(wqe,
+ 48,
+ LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
+ } else {
+ set_64bit_val(wqe,
+ 56,
+ LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
+ LS_64(info->dest_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
+
+ set_64bit_val(wqe,
+ 48,
+ LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
+ LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
+ }
+ qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
+ if (info->vlan_valid)
+ qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
+ set_64bit_val(wqe, 16, qw2);
+ if (info->entry_type == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
+ qw1 |= LS_64(info->src_port, I40IW_CQPSQ_QHASH_SRC_PORT);
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe,
+ 40,
+ LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
+ LS_64(info->src_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
+ set_64bit_val(wqe,
+ 32,
+ LS_64(info->src_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
+ LS_64(info->src_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
+ } else {
+ set_64bit_val(wqe,
+ 32,
+ LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
+ }
+ }
+
+ set_64bit_val(wqe, 8, qw1);
+ temp = LS_64(cqp->polarity, I40IW_CQPSQ_QHASH_WQEVALID) |
+ LS_64(I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY, I40IW_CQPSQ_QHASH_OPCODE) |
+ LS_64(info->manage, I40IW_CQPSQ_QHASH_MANAGE) |
+ LS_64(info->ipv4_valid, I40IW_CQPSQ_QHASH_IPV4VALID) |
+ LS_64(info->vlan_valid, I40IW_CQPSQ_QHASH_VLANVALID) |
+ LS_64(info->entry_type, I40IW_CQPSQ_QHASH_ENTRYTYPE);
+
+ i40iw_insert_wqe_hdr(wqe, temp);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_QHASH WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_alloc_local_mac_ipaddr_entry - cqp wqe for loc mac entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_alloc_local_mac_ipaddr_entry(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ header = LS_64(I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ALLOCATE_LOCAL_MAC_IPADDR WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_add_local_mac_ipaddr_entry - add mac enry
+ * @cqp: struct for cqp hw
+ * @info:mac addr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry(
+ struct i40iw_sc_cqp *cqp,
+ struct i40iw_local_mac_ipaddr_entry_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 temp, header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ temp = info->mac_addr[5] |
+ LS_64_1(info->mac_addr[4], 8) |
+ LS_64_1(info->mac_addr[3], 16) |
+ LS_64_1(info->mac_addr[2], 24) |
+ LS_64_1(info->mac_addr[1], 32) |
+ LS_64_1(info->mac_addr[0], 40);
+
+ set_64bit_val(wqe, 32, temp);
+
+ header = LS_64(info->entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
+ LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ADD_LOCAL_MAC_IPADDR WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_del_local_mac_ipaddr_entry - cqp wqe to dele local mac
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @entry_idx: index of mac entry
+ * @ ignore_ref_count: to force mac adde delete
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u8 entry_idx,
+ u8 ignore_ref_count,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ header = LS_64(entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
+ LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_MLIPA_FREEENTRY) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+ LS_64(ignore_ref_count, I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "DEL_LOCAL_MAC_IPADDR WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_cqp_nop - send a nop wqe
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cqp_nop(struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 header;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ header = LS_64(I40IW_CQP_OP_NOP, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "NOP WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_ceq_init - initialize ceq
+ * @ceq: ceq sc structure
+ * @info: ceq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_ceq_init(struct i40iw_sc_ceq *ceq,
+ struct i40iw_ceq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if ((info->elem_cnt < I40IW_MIN_CEQ_ENTRIES) ||
+ (info->elem_cnt > I40IW_MAX_CEQ_ENTRIES))
+ return I40IW_ERR_INVALID_SIZE;
+
+ if (info->ceq_id >= I40IW_MAX_CEQID)
+ return I40IW_ERR_INVALID_CEQ_ID;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ ceq->size = sizeof(*ceq);
+ ceq->ceqe_base = (struct i40iw_ceqe *)info->ceqe_base;
+ ceq->ceq_id = info->ceq_id;
+ ceq->dev = info->dev;
+ ceq->elem_cnt = info->elem_cnt;
+ ceq->ceq_elem_pa = info->ceqe_pa;
+ ceq->virtual_map = info->virtual_map;
+
+ ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
+ ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
+ ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
+
+ ceq->tph_en = info->tph_en;
+ ceq->tph_val = info->tph_val;
+ ceq->polarity = 1;
+ I40IW_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
+ ceq->dev->ceq[info->ceq_id] = ceq;
+
+ return 0;
+}
+
+/**
+ * i40iw_sc_ceq_create - create ceq wqe
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ceq_create(struct i40iw_sc_ceq *ceq,
+ u64 scratch,
+ bool post_sq)
+{
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+
+ cqp = ceq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 16, ceq->elem_cnt);
+ set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
+ set_64bit_val(wqe, 48, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56, LS_64(ceq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+ header = ceq->ceq_id |
+ LS_64(I40IW_CQP_OP_CREATE_CEQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
+ LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
+ LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_CREATE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_cceq_create_done - poll for control ceq wqe to complete
+ * @ceq: ceq sc structure
+ */
+static enum i40iw_status_code i40iw_sc_cceq_create_done(struct i40iw_sc_ceq *ceq)
+{
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = ceq->dev->cqp;
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CEQ, NULL);
+}
+
+/**
+ * i40iw_sc_cceq_destroy_done - poll for destroy cceq to complete
+ * @ceq: ceq sc structure
+ */
+static enum i40iw_status_code i40iw_sc_cceq_destroy_done(struct i40iw_sc_ceq *ceq)
+{
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = ceq->dev->cqp;
+ cqp->process_cqp_sds = i40iw_update_sds_noccq;
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_CEQ, NULL);
+}
+
+/**
+ * i40iw_sc_cceq_create - create cceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_cceq_create(struct i40iw_sc_ceq *ceq, u64 scratch)
+{
+ enum i40iw_status_code ret_code;
+
+ ret_code = i40iw_sc_ceq_create(ceq, scratch, true);
+ if (!ret_code)
+ ret_code = i40iw_sc_cceq_create_done(ceq);
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_ceq_destroy - destroy ceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ceq_destroy(struct i40iw_sc_ceq *ceq,
+ u64 scratch,
+ bool post_sq)
+{
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+
+ cqp = ceq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 16, ceq->elem_cnt);
+ set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+ header = ceq->ceq_id |
+ LS_64(I40IW_CQP_OP_DESTROY_CEQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
+ LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
+ LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_DESTROY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_process_ceq - process ceq
+ * @dev: sc device struct
+ * @ceq: ceq sc structure
+ */
+static void *i40iw_sc_process_ceq(struct i40iw_sc_dev *dev, struct i40iw_sc_ceq *ceq)
+{
+ u64 temp;
+ u64 *ceqe;
+ struct i40iw_sc_cq *cq = NULL;
+ u8 polarity;
+
+ ceqe = (u64 *)I40IW_GET_CURRENT_CEQ_ELEMENT(ceq);
+ get_64bit_val(ceqe, 0, &temp);
+ polarity = (u8)RS_64(temp, I40IW_CEQE_VALID);
+ if (polarity != ceq->polarity)
+ return cq;
+
+ cq = (struct i40iw_sc_cq *)(unsigned long)LS_64_1(temp, 1);
+
+ I40IW_RING_MOVE_TAIL(ceq->ceq_ring);
+ if (I40IW_RING_GETCURRENT_TAIL(ceq->ceq_ring) == 0)
+ ceq->polarity ^= 1;
+
+ if (dev->is_pf)
+ i40iw_wr32(dev->hw, I40E_PFPE_CQACK, cq->cq_uk.cq_id);
+ else
+ i40iw_wr32(dev->hw, I40E_VFPE_CQACK1, cq->cq_uk.cq_id);
+
+ return cq;
+}
+
+/**
+ * i40iw_sc_aeq_init - initialize aeq
+ * @aeq: aeq structure ptr
+ * @info: aeq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_aeq_init(struct i40iw_sc_aeq *aeq,
+ struct i40iw_aeq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if ((info->elem_cnt < I40IW_MIN_AEQ_ENTRIES) ||
+ (info->elem_cnt > I40IW_MAX_AEQ_ENTRIES))
+ return I40IW_ERR_INVALID_SIZE;
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ aeq->size = sizeof(*aeq);
+ aeq->polarity = 1;
+ aeq->aeqe_base = (struct i40iw_sc_aeqe *)info->aeqe_base;
+ aeq->dev = info->dev;
+ aeq->elem_cnt = info->elem_cnt;
+
+ aeq->aeq_elem_pa = info->aeq_elem_pa;
+ I40IW_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
+ info->dev->aeq = aeq;
+
+ aeq->virtual_map = info->virtual_map;
+ aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
+ aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
+ aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
+ info->dev->aeq = aeq;
+ return 0;
+}
+
+/**
+ * i40iw_sc_aeq_create - create aeq
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_aeq_create(struct i40iw_sc_aeq *aeq,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ cqp = aeq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 16, aeq->elem_cnt);
+ set_64bit_val(wqe, 32,
+ (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
+ set_64bit_val(wqe, 48,
+ (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
+
+ header = LS_64(I40IW_CQP_OP_CREATE_AEQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
+ LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_CREATE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_aeq_destroy - destroy aeq during close
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_aeq_destroy(struct i40iw_sc_aeq *aeq,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ cqp = aeq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 16, aeq->elem_cnt);
+ set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
+ header = LS_64(I40IW_CQP_OP_DESTROY_AEQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
+ LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_DESTROY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_get_next_aeqe - get next aeq entry
+ * @aeq: aeq structure ptr
+ * @info: aeqe info to be returned
+ */
+static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
+ struct i40iw_aeqe_info *info)
+{
+ u64 temp, compl_ctx;
+ u64 *aeqe;
+ u16 wqe_idx;
+ u8 ae_src;
+ u8 polarity;
+
+ aeqe = (u64 *)I40IW_GET_CURRENT_AEQ_ELEMENT(aeq);
+ get_64bit_val(aeqe, 0, &compl_ctx);
+ get_64bit_val(aeqe, 8, &temp);
+ polarity = (u8)RS_64(temp, I40IW_AEQE_VALID);
+
+ if (aeq->polarity != polarity)
+ return I40IW_ERR_QUEUE_EMPTY;
+
+ i40iw_debug_buf(aeq->dev, I40IW_DEBUG_WQE, "AEQ_ENTRY", aeqe, 16);
+
+ ae_src = (u8)RS_64(temp, I40IW_AEQE_AESRC);
+ wqe_idx = (u16)RS_64(temp, I40IW_AEQE_WQDESCIDX);
+ info->qp_cq_id = (u32)RS_64(temp, I40IW_AEQE_QPCQID);
+ info->ae_id = (u16)RS_64(temp, I40IW_AEQE_AECODE);
+ info->tcp_state = (u8)RS_64(temp, I40IW_AEQE_TCPSTATE);
+ info->iwarp_state = (u8)RS_64(temp, I40IW_AEQE_IWSTATE);
+ info->q2_data_written = (u8)RS_64(temp, I40IW_AEQE_Q2DATA);
+ info->aeqe_overflow = (bool)RS_64(temp, I40IW_AEQE_OVERFLOW);
+ switch (ae_src) {
+ case I40IW_AE_SOURCE_RQ:
+ case I40IW_AE_SOURCE_RQ_0011:
+ info->qp = true;
+ info->wqe_idx = wqe_idx;
+ info->compl_ctx = compl_ctx;
+ break;
+ case I40IW_AE_SOURCE_CQ:
+ case I40IW_AE_SOURCE_CQ_0110:
+ case I40IW_AE_SOURCE_CQ_1010:
+ case I40IW_AE_SOURCE_CQ_1110:
+ info->cq = true;
+ info->compl_ctx = LS_64_1(compl_ctx, 1);
+ break;
+ case I40IW_AE_SOURCE_SQ:
+ case I40IW_AE_SOURCE_SQ_0111:
+ info->qp = true;
+ info->sq = true;
+ info->wqe_idx = wqe_idx;
+ info->compl_ctx = compl_ctx;
+ break;
+ case I40IW_AE_SOURCE_IN_RR_WR:
+ case I40IW_AE_SOURCE_IN_RR_WR_1011:
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ info->in_rdrsp_wr = true;
+ break;
+ case I40IW_AE_SOURCE_OUT_RR:
+ case I40IW_AE_SOURCE_OUT_RR_1111:
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ info->out_rdrsp = true;
+ break;
+ default:
+ break;
+ }
+ I40IW_RING_MOVE_TAIL(aeq->aeq_ring);
+ if (I40IW_RING_GETCURRENT_TAIL(aeq->aeq_ring) == 0)
+ aeq->polarity ^= 1;
+ return 0;
+}
+
+/**
+ * i40iw_sc_repost_aeq_entries - repost completed aeq entries
+ * @dev: sc device struct
+ * @count: allocate count
+ */
+static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
+ u32 count)
+{
+ if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
+ return I40IW_ERR_INVALID_SIZE;
+
+ if (dev->is_pf)
+ i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
+ else
+ i40iw_wr32(dev->hw, I40E_VFPE_AEQALLOC1, count);
+
+ return 0;
+}
+
+/**
+ * i40iw_sc_aeq_create_done - create aeq
+ * @aeq: aeq structure ptr
+ */
+static enum i40iw_status_code i40iw_sc_aeq_create_done(struct i40iw_sc_aeq *aeq)
+{
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = aeq->dev->cqp;
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_AEQ, NULL);
+}
+
+/**
+ * i40iw_sc_aeq_destroy_done - destroy of aeq during close
+ * @aeq: aeq structure ptr
+ */
+static enum i40iw_status_code i40iw_sc_aeq_destroy_done(struct i40iw_sc_aeq *aeq)
+{
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = aeq->dev->cqp;
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_AEQ, NULL);
+}
+
+/**
+ * i40iw_sc_ccq_init - initialize control cq
+ * @cq: sc's cq ctruct
+ * @info: info for control cq initialization
+ */
+static enum i40iw_status_code i40iw_sc_ccq_init(struct i40iw_sc_cq *cq,
+ struct i40iw_ccq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->num_elem < I40IW_MIN_CQ_SIZE || info->num_elem > I40IW_MAX_CQ_SIZE)
+ return I40IW_ERR_INVALID_SIZE;
+
+ if (info->ceq_id > I40IW_MAX_CEQID)
+ return I40IW_ERR_INVALID_CEQ_ID;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ cq->cq_pa = info->cq_pa;
+ cq->cq_uk.cq_base = info->cq_base;
+ cq->shadow_area_pa = info->shadow_area_pa;
+ cq->cq_uk.shadow_area = info->shadow_area;
+ cq->shadow_read_threshold = info->shadow_read_threshold;
+ cq->dev = info->dev;
+ cq->ceq_id = info->ceq_id;
+ cq->cq_uk.cq_size = info->num_elem;
+ cq->cq_type = I40IW_CQ_TYPE_CQP;
+ cq->ceqe_mask = info->ceqe_mask;
+ I40IW_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
+
+ cq->cq_uk.cq_id = 0; /* control cq is id 0 always */
+ cq->ceq_id_valid = info->ceq_id_valid;
+ cq->tph_en = info->tph_en;
+ cq->tph_val = info->tph_val;
+ cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
+
+ cq->pbl_list = info->pbl_list;
+ cq->virtual_map = info->virtual_map;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->cq_uk.polarity = true;
+
+ /* following are only for iw cqs so initialize them to zero */
+ cq->cq_uk.cqe_alloc_reg = NULL;
+ info->dev->ccq = cq;
+ return 0;
+}
+
+/**
+ * i40iw_sc_ccq_create_done - poll cqp for ccq create
+ * @ccq: ccq sc struct
+ */
+static enum i40iw_status_code i40iw_sc_ccq_create_done(struct i40iw_sc_cq *ccq)
+{
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = ccq->dev->cqp;
+ return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CQ, NULL);
+}
+
+/**
+ * i40iw_sc_ccq_create - create control cq
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: overlow flag for ccq
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ccq_create(struct i40iw_sc_cq *ccq,
+ u64 scratch,
+ bool check_overflow,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ enum i40iw_status_code ret_code;
+
+ cqp = ccq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
+ set_64bit_val(wqe, 16,
+ LS_64(ccq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+ set_64bit_val(wqe, 32, (ccq->virtual_map ? 0 : ccq->cq_pa));
+ set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+ set_64bit_val(wqe, 48,
+ (ccq->virtual_map ? ccq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56,
+ LS_64(ccq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+ header = ccq->cq_uk.cq_id |
+ LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+ LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(ccq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+ LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+ LS_64(ccq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+ LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_CREATE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq) {
+ i40iw_sc_cqp_post_sq(cqp);
+ ret_code = i40iw_sc_ccq_create_done(ccq);
+ if (ret_code)
+ return ret_code;
+ }
+ cqp->process_cqp_sds = i40iw_cqp_sds_cmd;
+
+ return 0;
+}
+
+/**
+ * i40iw_sc_ccq_destroy - destroy ccq during close
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
+ u64 scratch,
+ bool post_sq)
+{
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+ enum i40iw_status_code ret_code = 0;
+ u32 tail, val, error;
+
+ cqp = ccq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
+ set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+
+ header = ccq->cq_uk.cq_id |
+ LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+ LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_DESTROY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+ if (error)
+ return I40IW_ERR_CQP_COMPL_ERROR;
+
+ if (post_sq) {
+ i40iw_sc_cqp_post_sq(cqp);
+ ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_cq_init - initialize completion q
+ * @cq: cq struct
+ * @info: cq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_cq_init(struct i40iw_sc_cq *cq,
+ struct i40iw_cq_init_info *info)
+{
+ u32 __iomem *cqe_alloc_reg = NULL;
+ enum i40iw_status_code ret_code;
+ u32 pble_obj_cnt;
+ u32 arm_offset;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ cq->cq_pa = info->cq_base_pa;
+ cq->dev = info->dev;
+ cq->ceq_id = info->ceq_id;
+ arm_offset = (info->dev->is_pf) ? I40E_PFPE_CQARM : I40E_VFPE_CQARM1;
+ if (i40iw_get_hw_addr(cq->dev))
+ cqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(cq->dev) +
+ arm_offset);
+ info->cq_uk_init_info.cqe_alloc_reg = cqe_alloc_reg;
+ ret_code = i40iw_cq_uk_init(&cq->cq_uk, &info->cq_uk_init_info);
+ if (ret_code)
+ return ret_code;
+ cq->virtual_map = info->virtual_map;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ cq->ceqe_mask = info->ceqe_mask;
+ cq->cq_type = (info->type) ? info->type : I40IW_CQ_TYPE_IWARP;
+
+ cq->shadow_area_pa = info->shadow_area_pa;
+ cq->shadow_read_threshold = info->shadow_read_threshold;
+
+ cq->ceq_id_valid = info->ceq_id_valid;
+ cq->tph_en = info->tph_en;
+ cq->tph_val = info->tph_val;
+
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+
+ return 0;
+}
+
+/**
+ * i40iw_sc_cq_create - create completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: flag for overflow check
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cq_create(struct i40iw_sc_cq *cq,
+ u64 scratch,
+ bool check_overflow,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ if (cq->cq_uk.cq_id > I40IW_MAX_CQID)
+ return I40IW_ERR_INVALID_CQ_ID;
+
+ if (cq->ceq_id > I40IW_MAX_CEQID)
+ return I40IW_ERR_INVALID_CEQ_ID;
+
+ cqp = cq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+ set_64bit_val(wqe,
+ 16,
+ LS_64(cq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+
+ set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+ header = cq->cq_uk.cq_id |
+ LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+ LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+ LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+ LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+ LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_CREATE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_cq_destroy - destroy completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq,
+ u64 scratch,
+ bool post_sq)
+{
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+
+ cqp = cq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+
+ header = cq->cq_uk.cq_id |
+ LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+ LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+ LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+ LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_DESTROY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_cq_modify - modify a Completion Queue
+ * @cq: cq struct
+ * @info: modification info struct
+ * @scratch:
+ * @post_sq: flag to post to sq
+ */
+static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq,
+ struct i40iw_modify_cq_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+ u32 cq_size, ceq_id, first_pm_pbl_idx;
+ u8 pbl_chunk_size;
+ bool virtual_map, ceq_id_valid, check_overflow;
+ u32 pble_obj_cnt;
+
+ if (info->ceq_valid && (info->ceq_id > I40IW_MAX_CEQID))
+ return I40IW_ERR_INVALID_CEQ_ID;
+
+ pble_obj_cnt = cq->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if (info->cq_resize && info->virtual_map &&
+ (info->first_pm_pbl_idx >= pble_obj_cnt))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ cqp = cq->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ cq->pbl_list = info->pbl_list;
+ cq->cq_pa = info->cq_pa;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+
+ cq_size = info->cq_resize ? info->cq_size : cq->cq_uk.cq_size;
+ if (info->ceq_change) {
+ ceq_id_valid = true;
+ ceq_id = info->ceq_id;
+ } else {
+ ceq_id_valid = cq->ceq_id_valid;
+ ceq_id = ceq_id_valid ? cq->ceq_id : 0;
+ }
+ virtual_map = info->cq_resize ? info->virtual_map : cq->virtual_map;
+ first_pm_pbl_idx = (info->cq_resize ?
+ (info->virtual_map ? info->first_pm_pbl_idx : 0) :
+ (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+ pbl_chunk_size = (info->cq_resize ?
+ (info->virtual_map ? info->pbl_chunk_size : 0) :
+ (cq->virtual_map ? cq->pbl_chunk_size : 0));
+ check_overflow = info->check_overflow_change ? info->check_overflow :
+ cq->check_overflow;
+ cq->cq_uk.cq_size = cq_size;
+ cq->ceq_id_valid = ceq_id_valid;
+ cq->ceq_id = ceq_id;
+ cq->virtual_map = virtual_map;
+ cq->first_pm_pbl_idx = first_pm_pbl_idx;
+ cq->pbl_chunk_size = pbl_chunk_size;
+ cq->check_overflow = check_overflow;
+
+ set_64bit_val(wqe, 0, cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+ set_64bit_val(wqe, 16,
+ LS_64(info->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+ set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48, (cq->virtual_map ? first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+ header = cq->cq_uk.cq_id |
+ LS_64(ceq_id, I40IW_CQPSQ_CQ_CEQID) |
+ LS_64(I40IW_CQP_OP_MODIFY_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(info->cq_resize, I40IW_CQPSQ_CQ_CQRESIZE) |
+ LS_64(pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+ LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+ LS_64(virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+ LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+ LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_MODIFY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_init - initialize qp
+ * @qp: sc qp
+ * @info: initialization qp info
+ */
+static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
+ struct i40iw_qp_init_info *info)
+{
+ u32 __iomem *wqe_alloc_reg = NULL;
+ enum i40iw_status_code ret_code;
+ u32 pble_obj_cnt;
+ u8 wqe_size;
+ u32 offset;
+
+ qp->dev = info->pd->dev;
+ qp->sq_pa = info->sq_pa;
+ qp->rq_pa = info->rq_pa;
+ qp->hw_host_ctx_pa = info->host_ctx_pa;
+ qp->q2_pa = info->q2_pa;
+ qp->shadow_area_pa = info->shadow_area_pa;
+
+ qp->q2_buf = info->q2;
+ qp->pd = info->pd;
+ qp->hw_host_ctx = info->host_ctx;
+ offset = (qp->pd->dev->is_pf) ? I40E_PFPE_WQEALLOC : I40E_VFPE_WQEALLOC1;
+ if (i40iw_get_hw_addr(qp->pd->dev))
+ wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+ offset);
+
+ info->qp_uk_init_info.wqe_alloc_reg = wqe_alloc_reg;
+ ret_code = i40iw_qp_uk_init(&qp->qp_uk, &info->qp_uk_init_info);
+ if (ret_code)
+ return ret_code;
+ qp->virtual_map = info->virtual_map;
+
+ pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if ((info->virtual_map && (info->sq_pa >= pble_obj_cnt)) ||
+ (info->virtual_map && (info->rq_pa >= pble_obj_cnt)))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ qp->llp_stream_handle = (void *)(-1);
+ qp->qp_type = (info->type) ? info->type : I40IW_QP_TYPE_IWARP;
+
+ qp->hw_sq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
+ false);
+ i40iw_debug(qp->dev, I40IW_DEBUG_WQE, "%s: hw_sq_size[%04d] sq_ring.size[%04d]\n",
+ __func__, qp->hw_sq_size, qp->qp_uk.sq_ring.size);
+ ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
+ &wqe_size);
+ if (ret_code)
+ return ret_code;
+ qp->hw_rq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.rq_size *
+ (wqe_size / I40IW_QP_WQE_MIN_SIZE), false);
+ i40iw_debug(qp->dev, I40IW_DEBUG_WQE,
+ "%s: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
+ __func__, qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
+ qp->sq_tph_val = info->sq_tph_val;
+ qp->rq_tph_val = info->rq_tph_val;
+ qp->sq_tph_en = info->sq_tph_en;
+ qp->rq_tph_en = info->rq_tph_en;
+ qp->rcv_tph_en = info->rcv_tph_en;
+ qp->xmit_tph_en = info->xmit_tph_en;
+ qp->qs_handle = qp->pd->dev->qs_handle;
+ qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
+
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_create - create qp
+ * @qp: sc qp
+ * @info: qp create info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_create(
+ struct i40iw_sc_qp *qp,
+ struct i40iw_create_qp_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+
+ if ((qp->qp_uk.qp_id < I40IW_MIN_IW_QP_ID) ||
+ (qp->qp_uk.qp_id > I40IW_MAX_IW_QP_ID))
+ return I40IW_ERR_INVALID_QP_ID;
+
+ cqp = qp->pd->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ header = qp->qp_uk.qp_id |
+ LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
+ LS_64((info->ord_valid ? 1 : 0), I40IW_CQPSQ_QP_ORDVALID) |
+ LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
+ LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+ LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
+ LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
+ LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
+ LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
+ LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_CREATE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_modify - modify qp cqp wqe
+ * @qp: sc qp
+ * @info: modify qp info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_modify(
+ struct i40iw_sc_qp *qp,
+ struct i40iw_modify_qp_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ u8 term_actions = 0;
+ u8 term_len = 0;
+
+ cqp = qp->pd->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ if (info->next_iwarp_state == I40IW_QP_STATE_TERMINATE) {
+ if (info->dont_send_fin)
+ term_actions += I40IWQP_TERM_SEND_TERM_ONLY;
+ if (info->dont_send_term)
+ term_actions += I40IWQP_TERM_SEND_FIN_ONLY;
+ if ((term_actions == I40IWQP_TERM_SEND_TERM_AND_FIN) ||
+ (term_actions == I40IWQP_TERM_SEND_TERM_ONLY))
+ term_len = info->termlen;
+ }
+
+ set_64bit_val(wqe,
+ 8,
+ LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
+ LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ header = qp->qp_uk.qp_id |
+ LS_64(I40IW_CQP_OP_MODIFY_QP, I40IW_CQPSQ_OPCODE) |
+ LS_64(info->ord_valid, I40IW_CQPSQ_QP_ORDVALID) |
+ LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
+ LS_64(info->cached_var_valid, I40IW_CQPSQ_QP_CACHEDVARVALID) |
+ LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
+ LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
+ LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
+ LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+ LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
+ LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
+ LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
+ LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
+ LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) |
+ LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
+ LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_MODIFY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_destroy - cqp destroy qp
+ * @qp: sc qp
+ * @scratch: u64 saved to be used during cqp completion
+ * @remove_hash_idx: flag if to remove hash idx
+ * @ignore_mw_bnd: memory window bind flag
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_destroy(
+ struct i40iw_sc_qp *qp,
+ u64 scratch,
+ bool remove_hash_idx,
+ bool ignore_mw_bnd,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ cqp = qp->pd->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ header = qp->qp_uk.qp_id |
+ LS_64(I40IW_CQP_OP_DESTROY_QP, I40IW_CQPSQ_OPCODE) |
+ LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+ LS_64(ignore_mw_bnd, I40IW_CQPSQ_QP_IGNOREMWBOUND) |
+ LS_64(remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_DESTROY WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_flush_wqes - flush qp's wqe
+ * @qp: sc qp
+ * @info: dlush information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
+ struct i40iw_sc_qp *qp,
+ struct i40iw_qp_flush_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 temp = 0;
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ bool flush_sq = false, flush_rq = false;
+
+ if (info->rq && !qp->flush_rq)
+ flush_rq = true;
+
+ if (info->sq && !qp->flush_sq)
+ flush_sq = true;
+
+ qp->flush_sq |= flush_sq;
+ qp->flush_rq |= flush_rq;
+ if (!flush_sq && !flush_rq) {
+ if (info->ae_code != I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR)
+ return 0;
+ }
+
+ cqp = qp->pd->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ if (info->userflushcode) {
+ if (flush_rq) {
+ temp |= LS_64(info->rq_minor_code, I40IW_CQPSQ_FWQE_RQMNERR) |
+ LS_64(info->rq_major_code, I40IW_CQPSQ_FWQE_RQMJERR);
+ }
+ if (flush_sq) {
+ temp |= LS_64(info->sq_minor_code, I40IW_CQPSQ_FWQE_SQMNERR) |
+ LS_64(info->sq_major_code, I40IW_CQPSQ_FWQE_SQMJERR);
+ }
+ }
+ set_64bit_val(wqe, 16, temp);
+
+ temp = (info->generate_ae) ?
+ info->ae_code | LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE) : 0;
+
+ set_64bit_val(wqe, 8, temp);
+
+ header = qp->qp_uk.qp_id |
+ LS_64(I40IW_CQP_OP_FLUSH_WQES, I40IW_CQPSQ_OPCODE) |
+ LS_64(info->generate_ae, I40IW_CQPSQ_FWQE_GENERATE_AE) |
+ LS_64(info->userflushcode, I40IW_CQPSQ_FWQE_USERFLCODE) |
+ LS_64(flush_sq, I40IW_CQPSQ_FWQE_FLUSHSQ) |
+ LS_64(flush_rq, I40IW_CQPSQ_FWQE_FLUSHRQ) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_FLUSH WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_upload_context - upload qp's context
+ * @dev: sc device struct
+ * @info: upload context info ptr for return
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_upload_context(
+ struct i40iw_sc_dev *dev,
+ struct i40iw_upload_context_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 16, info->buf_pa);
+
+ header = LS_64(info->qp_id, I40IW_CQPSQ_UCTX_QPID) |
+ LS_64(I40IW_CQP_OP_UPLOAD_CONTEXT, I40IW_CQPSQ_OPCODE) |
+ LS_64(info->qp_type, I40IW_CQPSQ_UCTX_QPTYPE) |
+ LS_64(info->raw_format, I40IW_CQPSQ_UCTX_RAWFORMAT) |
+ LS_64(info->freeze_qp, I40IW_CQPSQ_UCTX_FREEZEQP) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QP_UPLOAD_CTX WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_qp_setctx - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static enum i40iw_status_code i40iw_sc_qp_setctx(
+ struct i40iw_sc_qp *qp,
+ u64 *qp_ctx,
+ struct i40iw_qp_host_ctx_info *info)
+{
+ struct i40iwarp_offload_info *iw;
+ struct i40iw_tcp_offload_info *tcp;
+ u64 qw0, qw3, qw7 = 0;
+
+ iw = info->iwarp_info;
+ tcp = info->tcp_info;
+ qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
+ LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
+ LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
+ LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) |
+ LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) |
+ LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) |
+ LS_64(info->push_idx, I40IWQPC_PPIDX) |
+ LS_64(info->push_mode_en, I40IWQPC_PMENA);
+
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+ qw3 = LS_64(qp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
+ LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
+ LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE);
+
+ set_64bit_val(qp_ctx,
+ 128,
+ LS_64(info->err_rq_idx, I40IWQPC_ERR_RQ_IDX));
+
+ set_64bit_val(qp_ctx,
+ 136,
+ LS_64(info->send_cq_num, I40IWQPC_TXCQNUM) |
+ LS_64(info->rcv_cq_num, I40IWQPC_RXCQNUM));
+
+ set_64bit_val(qp_ctx,
+ 168,
+ LS_64(info->qp_compl_ctx, I40IWQPC_QPCOMPCTX));
+ set_64bit_val(qp_ctx,
+ 176,
+ LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
+ LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
+ LS_64(qp->qs_handle, I40IWQPC_QSHANDLE) |
+ LS_64(qp->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE));
+
+ if (info->iwarp_info_valid) {
+ qw0 |= LS_64(iw->ddp_ver, I40IWQPC_DDP_VER) |
+ LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
+
+ qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
+ set_64bit_val(qp_ctx, 144, qp->q2_pa);
+ set_64bit_val(qp_ctx,
+ 152,
+ LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
+
+ /*
+ * Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an
+ *advertisable IRD of 64
+ */
+ iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD;
+ set_64bit_val(qp_ctx,
+ 160,
+ LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
+ LS_64(iw->ird_size, I40IWQPC_IRDSIZE) |
+ LS_64(iw->wr_rdresp_en, I40IWQPC_WRRDRSPOK) |
+ LS_64(iw->rd_enable, I40IWQPC_RDOK) |
+ LS_64(iw->snd_mark_en, I40IWQPC_SNDMARKERS) |
+ LS_64(iw->bind_en, I40IWQPC_BINDEN) |
+ LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
+ LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
+ LS_64(1, I40IWQPC_IWARPMODE) |
+ LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
+ LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
+ LS_64(iw->rcv_no_mpa_crc, I40IWQPC_RCVNOMPACRC) |
+ LS_64(iw->rcv_mark_offset, I40IWQPC_RCVMARKOFFSET) |
+ LS_64(iw->snd_mark_offset, I40IWQPC_SNDMARKOFFSET));
+ }
+ if (info->tcp_info_valid) {
+ qw0 |= LS_64(tcp->ipv4, I40IWQPC_IPV4) |
+ LS_64(tcp->no_nagle, I40IWQPC_NONAGLE) |
+ LS_64(tcp->insert_vlan_tag, I40IWQPC_INSERTVLANTAG) |
+ LS_64(tcp->time_stamp, I40IWQPC_TIMESTAMP) |
+ LS_64(tcp->cwnd_inc_limit, I40IWQPC_LIMIT) |
+ LS_64(tcp->drop_ooo_seg, I40IWQPC_DROPOOOSEG) |
+ LS_64(tcp->dup_ack_thresh, I40IWQPC_DUPACK_THRESH);
+
+ qw3 |= LS_64(tcp->ttl, I40IWQPC_TTL) |
+ LS_64(tcp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
+ LS_64(tcp->avoid_stretch_ack, I40IWQPC_AVOIDSTRETCHACK) |
+ LS_64(tcp->tos, I40IWQPC_TOS) |
+ LS_64(tcp->src_port, I40IWQPC_SRCPORTNUM) |
+ LS_64(tcp->dst_port, I40IWQPC_DESTPORTNUM);
+
+ qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
+ set_64bit_val(qp_ctx,
+ 32,
+ LS_64(tcp->dest_ip_addr2, I40IWQPC_DESTIPADDR2) |
+ LS_64(tcp->dest_ip_addr3, I40IWQPC_DESTIPADDR3));
+
+ set_64bit_val(qp_ctx,
+ 40,
+ LS_64(tcp->dest_ip_addr0, I40IWQPC_DESTIPADDR0) |
+ LS_64(tcp->dest_ip_addr1, I40IWQPC_DESTIPADDR1));
+
+ set_64bit_val(qp_ctx,
+ 48,
+ LS_64(tcp->snd_mss, I40IWQPC_SNDMSS) |
+ LS_64(tcp->vlan_tag, I40IWQPC_VLANTAG) |
+ LS_64(tcp->arp_idx, I40IWQPC_ARPIDX));
+
+ qw7 |= LS_64(tcp->flow_label, I40IWQPC_FLOWLABEL) |
+ LS_64(tcp->wscale, I40IWQPC_WSCALE) |
+ LS_64(tcp->ignore_tcp_opt, I40IWQPC_IGNORE_TCP_OPT) |
+ LS_64(tcp->ignore_tcp_uns_opt, I40IWQPC_IGNORE_TCP_UNS_OPT) |
+ LS_64(tcp->tcp_state, I40IWQPC_TCPSTATE) |
+ LS_64(tcp->rcv_wscale, I40IWQPC_RCVSCALE) |
+ LS_64(tcp->snd_wscale, I40IWQPC_SNDSCALE);
+
+ set_64bit_val(qp_ctx,
+ 72,
+ LS_64(tcp->time_stamp_recent, I40IWQPC_TIMESTAMP_RECENT) |
+ LS_64(tcp->time_stamp_age, I40IWQPC_TIMESTAMP_AGE));
+ set_64bit_val(qp_ctx,
+ 80,
+ LS_64(tcp->snd_nxt, I40IWQPC_SNDNXT) |
+ LS_64(tcp->snd_wnd, I40IWQPC_SNDWND));
+
+ set_64bit_val(qp_ctx,
+ 88,
+ LS_64(tcp->rcv_nxt, I40IWQPC_RCVNXT) |
+ LS_64(tcp->rcv_wnd, I40IWQPC_RCVWND));
+ set_64bit_val(qp_ctx,
+ 96,
+ LS_64(tcp->snd_max, I40IWQPC_SNDMAX) |
+ LS_64(tcp->snd_una, I40IWQPC_SNDUNA));
+ set_64bit_val(qp_ctx,
+ 104,
+ LS_64(tcp->srtt, I40IWQPC_SRTT) |
+ LS_64(tcp->rtt_var, I40IWQPC_RTTVAR));
+ set_64bit_val(qp_ctx,
+ 112,
+ LS_64(tcp->ss_thresh, I40IWQPC_SSTHRESH) |
+ LS_64(tcp->cwnd, I40IWQPC_CWND));
+ set_64bit_val(qp_ctx,
+ 120,
+ LS_64(tcp->snd_wl1, I40IWQPC_SNDWL1) |
+ LS_64(tcp->snd_wl2, I40IWQPC_SNDWL2));
+ set_64bit_val(qp_ctx,
+ 128,
+ LS_64(tcp->max_snd_window, I40IWQPC_MAXSNDWND) |
+ LS_64(tcp->rexmit_thresh, I40IWQPC_REXMIT_THRESH));
+ set_64bit_val(qp_ctx,
+ 184,
+ LS_64(tcp->local_ipaddr3, I40IWQPC_LOCAL_IPADDR3) |
+ LS_64(tcp->local_ipaddr2, I40IWQPC_LOCAL_IPADDR2));
+ set_64bit_val(qp_ctx,
+ 192,
+ LS_64(tcp->local_ipaddr1, I40IWQPC_LOCAL_IPADDR1) |
+ LS_64(tcp->local_ipaddr0, I40IWQPC_LOCAL_IPADDR0));
+ }
+
+ set_64bit_val(qp_ctx, 0, qw0);
+ set_64bit_val(qp_ctx, 24, qw3);
+ set_64bit_val(qp_ctx, 56, qw7);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "QP_HOST)CTX WQE",
+ qp_ctx, I40IW_QP_CTX_SIZE);
+ return 0;
+}
+
+/**
+ * i40iw_sc_alloc_stag - mr stag alloc
+ * @dev: sc device struct
+ * @info: stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_alloc_stag(
+ struct i40iw_sc_dev *dev,
+ struct i40iw_allocate_stag_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe,
+ 8,
+ LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID) |
+ LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN));
+ set_64bit_val(wqe,
+ 16,
+ LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+ set_64bit_val(wqe,
+ 40,
+ LS_64(info->hmc_fcn_index, I40IW_CQPSQ_STAG_HMCFNIDX));
+
+ header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_STAG_MR) |
+ LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+ LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
+ LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+ LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+ LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
+ LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "ALLOC_STAG WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_mr_reg_non_shared - non-shared mr registration
+ * @dev: sc device struct
+ * @info: mr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
+ struct i40iw_sc_dev *dev,
+ struct i40iw_reg_ns_stag_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 temp;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ u32 pble_obj_cnt;
+ bool remote_access;
+ u8 addr_type;
+
+ if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
+ I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+ remote_access = true;
+ else
+ remote_access = false;
+
+ pble_obj_cnt = dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+ if (info->chunk_size && (info->first_pm_pbl_index >= pble_obj_cnt))
+ return I40IW_ERR_INVALID_PBLE_INDEX;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
+ set_64bit_val(wqe, 0, temp);
+
+ set_64bit_val(wqe,
+ 8,
+ LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN) |
+ LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+
+ set_64bit_val(wqe,
+ 16,
+ LS_64(info->stag_key, I40IW_CQPSQ_STAG_KEY) |
+ LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+ if (!info->chunk_size) {
+ set_64bit_val(wqe, 32, info->reg_addr_pa);
+ set_64bit_val(wqe, 48, 0);
+ } else {
+ set_64bit_val(wqe, 32, 0);
+ set_64bit_val(wqe, 48, info->first_pm_pbl_index);
+ }
+ set_64bit_val(wqe, 40, info->hmc_fcn_index);
+ set_64bit_val(wqe, 56, 0);
+
+ addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
+ header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_STAG_MR) |
+ LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
+ LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+ LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+ LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+ LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
+ LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
+ LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_NS WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_mr_reg_shared - registered shared memory region
+ * @dev: sc device struct
+ * @info: info for shared memory registeration
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mr_reg_shared(
+ struct i40iw_sc_dev *dev,
+ struct i40iw_register_shared_stag *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 temp, va64, fbo, header;
+ u32 va32;
+ bool remote_access;
+ u8 addr_type;
+
+ if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
+ I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+ remote_access = true;
+ else
+ remote_access = false;
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ va64 = (uintptr_t)(info->va);
+ va32 = (u32)(va64 & 0x00000000FFFFFFFF);
+ fbo = (u64)(va32 & (4096 - 1));
+
+ set_64bit_val(wqe,
+ 0,
+ (info->addr_type == I40IW_ADDR_TYPE_VA_BASED ? (uintptr_t)info->va : fbo));
+
+ set_64bit_val(wqe,
+ 8,
+ LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+ temp = LS_64(info->new_stag_key, I40IW_CQPSQ_STAG_KEY) |
+ LS_64(info->new_stag_idx, I40IW_CQPSQ_STAG_IDX) |
+ LS_64(info->parent_stag_idx, I40IW_CQPSQ_STAG_PARENTSTAGIDX);
+ set_64bit_val(wqe, 16, temp);
+
+ addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
+ header = LS_64(I40IW_CQP_OP_REG_SMR, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_STAG_MR) |
+ LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+ LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+ LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_SHARED WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_dealloc_stag - deallocate stag
+ * @dev: sc device struct
+ * @info: dealloc stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_dealloc_stag(
+ struct i40iw_sc_dev *dev,
+ struct i40iw_dealloc_stag_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 header;
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe,
+ 8,
+ LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe,
+ 16,
+ LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+
+ header = LS_64(I40IW_CQP_OP_DEALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+ LS_64(info->mr, I40IW_CQPSQ_STAG_MR) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "DEALLOC_STAG WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_query_stag - query hardware for stag
+ * @dev: sc device struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @stag_index: stag index for query
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_query_stag(struct i40iw_sc_dev *dev,
+ u64 scratch,
+ u32 stag_index,
+ bool post_sq)
+{
+ u64 header;
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe,
+ 16,
+ LS_64(stag_index, I40IW_CQPSQ_QUERYSTAG_IDX));
+
+ header = LS_64(I40IW_CQP_OP_QUERY_STAG, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QUERY_STAG WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_mw_alloc - mw allocate
+ * @dev: sc device struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @mw_stag_index:stag index
+ * @pd_id: pd is for this mw
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mw_alloc(
+ struct i40iw_sc_dev *dev,
+ u64 scratch,
+ u32 mw_stag_index,
+ u16 pd_id,
+ bool post_sq)
+{
+ u64 header;
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe, 8, LS_64(pd_id, I40IW_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe,
+ 16,
+ LS_64(mw_stag_index, I40IW_CQPSQ_STAG_IDX));
+
+ header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MW_ALLOC WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_send_lsmm - send last streaming mode message
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ * @stag: stag of lsmm buffer
+ */
+static void i40iw_sc_send_lsmm(struct i40iw_sc_qp *qp,
+ void *lsmm_buf,
+ u32 size,
+ i40iw_stag stag)
+{
+ u64 *wqe;
+ u64 header;
+ struct i40iw_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+
+ set_64bit_val(wqe, 8, (size | LS_64(stag, I40IWQPSQ_FRAG_STAG)));
+
+ set_64bit_val(wqe, 16, 0);
+
+ header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+ LS_64(1, I40IWQPSQ_STREAMMODE) |
+ LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_QP, "SEND_LSMM WQE",
+ wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_send_lsmm_nostag - for privilege qp
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ */
+static void i40iw_sc_send_lsmm_nostag(struct i40iw_sc_qp *qp,
+ void *lsmm_buf,
+ u32 size)
+{
+ u64 *wqe;
+ u64 header;
+ struct i40iw_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+
+ set_64bit_val(wqe, 8, size);
+
+ set_64bit_val(wqe, 16, 0);
+
+ header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+ LS_64(1, I40IWQPSQ_STREAMMODE) |
+ LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "SEND_LSMM_NOSTAG WQE",
+ wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_send_rtt - send last read0 or write0
+ * @qp: sc qp struct
+ * @read: Do read0 or write0
+ */
+static void i40iw_sc_send_rtt(struct i40iw_sc_qp *qp, bool read)
+{
+ u64 *wqe;
+ u64 header;
+ struct i40iw_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+ if (read) {
+ header = LS_64(0x1234, I40IWQPSQ_REMSTAG) |
+ LS_64(I40IWQP_OP_RDMA_READ, I40IWQPSQ_OPCODE) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+ set_64bit_val(wqe, 8, ((u64)0xabcd << 32));
+ } else {
+ header = LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+ }
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "RTR WQE",
+ wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_post_wqe0 - send wqe with opcode
+ * @qp: sc qp struct
+ * @opcode: opcode to use for wqe0
+ */
+static enum i40iw_status_code i40iw_sc_post_wqe0(struct i40iw_sc_qp *qp, u8 opcode)
+{
+ u64 *wqe;
+ u64 header;
+ struct i40iw_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+ switch (opcode) {
+ case I40IWQP_OP_NOP:
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+ header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ break;
+ case I40IWQP_OP_RDMA_SEND:
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+ header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID) |
+ LS_64(1, I40IWQPSQ_STREAMMODE) |
+ LS_64(1, I40IWQPSQ_WAITFORRCVPDU);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+ break;
+ default:
+ i40iw_debug(qp->dev, I40IW_DEBUG_QP, "%s: Invalid WQE zero opcode\n",
+ __func__);
+ break;
+ }
+ return 0;
+}
+
+/**
+ * i40iw_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
+ * @dev : ptr to i40iw_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
+{
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_dma_mem query_fpm_mem;
+ struct i40iw_virt_mem virt_mem;
+ struct i40iw_vfdev *vf_dev = NULL;
+ u32 mem_size;
+ enum i40iw_status_code ret_code = 0;
+ bool poll_registers = true;
+ u16 iw_vf_idx;
+ u8 wait_type;
+
+ if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
+ (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
+ return I40IW_ERR_INVALID_HMCFN_ID;
+
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "hmc_fn_id %u, dev->hmc_fn_id %u\n", hmc_fn_id,
+ dev->hmc_fn_id);
+ if (hmc_fn_id == dev->hmc_fn_id) {
+ hmc_info = dev->hmc_info;
+ query_fpm_mem.pa = dev->fpm_query_buf_pa;
+ query_fpm_mem.va = dev->fpm_query_buf;
+ } else {
+ vf_dev = i40iw_vfdev_from_fpm(dev, hmc_fn_id);
+ if (!vf_dev)
+ return I40IW_ERR_INVALID_VF_ID;
+
+ hmc_info = &vf_dev->hmc_info;
+ iw_vf_idx = vf_dev->iw_vf_idx;
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "vf_dev %p, hmc_info %p, hmc_obj %p\n", vf_dev,
+ hmc_info, hmc_info->hmc_obj);
+ if (!vf_dev->fpm_query_buf) {
+ if (!dev->vf_fpm_query_buf[iw_vf_idx].va) {
+ ret_code = i40iw_alloc_query_fpm_buf(dev,
+ &dev->vf_fpm_query_buf[iw_vf_idx]);
+ if (ret_code)
+ return ret_code;
+ }
+ vf_dev->fpm_query_buf = dev->vf_fpm_query_buf[iw_vf_idx].va;
+ vf_dev->fpm_query_buf_pa = dev->vf_fpm_query_buf[iw_vf_idx].pa;
+ }
+ query_fpm_mem.pa = vf_dev->fpm_query_buf_pa;
+ query_fpm_mem.va = vf_dev->fpm_query_buf;
+ /**
+ * It is HARDWARE specific:
+ * this call is done by PF for VF and
+ * i40iw_sc_query_fpm_values needs ccq poll
+ * because PF ccq is already created.
+ */
+ poll_registers = false;
+ }
+
+ hmc_info->hmc_fn_id = hmc_fn_id;
+
+ if (hmc_fn_id != dev->hmc_fn_id) {
+ ret_code =
+ i40iw_cqp_query_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
+ } else {
+ wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
+ (u8)I40IW_CQP_WAIT_POLL_CQ;
+
+ ret_code = i40iw_sc_query_fpm_values(
+ dev->cqp,
+ 0,
+ hmc_info->hmc_fn_id,
+ &query_fpm_mem,
+ true,
+ wait_type);
+ }
+ if (ret_code)
+ return ret_code;
+
+ /* parse the fpm_query_buf and fill hmc obj info */
+ ret_code =
+ i40iw_sc_parse_fpm_query_buf((u64 *)query_fpm_mem.va,
+ hmc_info,
+ &dev->hmc_fpm_misc);
+ if (ret_code)
+ return ret_code;
+ i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "QUERY FPM BUFFER",
+ query_fpm_mem.va, I40IW_QUERY_FPM_BUF_SIZE);
+
+ if (hmc_fn_id != dev->hmc_fn_id) {
+ i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
+
+ /* parse the fpm_commit_buf and fill hmc obj info */
+ i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj);
+ mem_size = sizeof(struct i40iw_hmc_sd_entry) *
+ (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
+ ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
+ if (ret_code)
+ return ret_code;
+ hmc_info->sd_table.sd_entry = virt_mem.va;
+ }
+
+ /* fill size of objects which are fixed */
+ hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].size = 4;
+ hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].size = 4;
+ hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size = 8;
+ hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].size = 8192;
+ hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_configure_iw_fpm() - commits hmc obj cnt values using cqp command and
+ * populates fpm base address in hmc_info
+ * @dev : ptr to i40iw_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev,
+ u8 hmc_fn_id)
+{
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_hmc_obj_info *obj_info;
+ u64 *buf;
+ struct i40iw_dma_mem commit_fpm_mem;
+ u32 i, j;
+ enum i40iw_status_code ret_code = 0;
+ bool poll_registers = true;
+ u8 wait_type;
+
+ if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
+ (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
+ return I40IW_ERR_INVALID_HMCFN_ID;
+
+ if (hmc_fn_id == dev->hmc_fn_id) {
+ hmc_info = dev->hmc_info;
+ } else {
+ hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, hmc_fn_id);
+ poll_registers = false;
+ }
+ if (!hmc_info)
+ return I40IW_ERR_BAD_PTR;
+
+ obj_info = hmc_info->hmc_obj;
+ buf = dev->fpm_commit_buf;
+
+ /* copy cnt values in commit buf */
+ for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE;
+ i++, j += 8)
+ set_64bit_val(buf, j, (u64)obj_info[i].cnt);
+
+ set_64bit_val(buf, 40, 0); /* APBVT rsvd */
+
+ commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
+ commit_fpm_mem.va = dev->fpm_commit_buf;
+ wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
+ (u8)I40IW_CQP_WAIT_POLL_CQ;
+ ret_code = i40iw_sc_commit_fpm_values(
+ dev->cqp,
+ 0,
+ hmc_info->hmc_fn_id,
+ &commit_fpm_mem,
+ true,
+ wait_type);
+
+ /* parse the fpm_commit_buf and fill hmc obj info */
+ if (!ret_code)
+ ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
+ commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
+
+ return ret_code;
+}
+
+/**
+ * cqp_sds_wqe_fill - fill cqp wqe doe sd
+ * @cqp: struct for cqp hw
+ * @info; sd info for wqe
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
+ struct i40iw_update_sds_info *info,
+ u64 scratch)
+{
+ u64 data;
+ u64 header;
+ u64 *wqe;
+ int mem_entries, wqe_entries;
+ struct i40iw_dma_mem *sdbuf = &cqp->sdbuf;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ I40IW_CQP_INIT_WQE(wqe);
+ wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
+ mem_entries = info->cnt - wqe_entries;
+
+ header = LS_64(I40IW_CQP_OP_UPDATE_PE_SDS, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+ LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT);
+
+ if (mem_entries) {
+ memcpy(sdbuf->va, &info->entry[3], (mem_entries << 4));
+ data = sdbuf->pa;
+ } else {
+ data = 0;
+ }
+ data |= LS_64(info->hmc_fn_id, I40IW_CQPSQ_UPESD_HMCFNID);
+
+ set_64bit_val(wqe, 16, data);
+
+ switch (wqe_entries) {
+ case 3:
+ set_64bit_val(wqe, 48,
+ (LS_64(info->entry[2].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
+ LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
+
+ set_64bit_val(wqe, 56, info->entry[2].data);
+ /* fallthrough */
+ case 2:
+ set_64bit_val(wqe, 32,
+ (LS_64(info->entry[1].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
+ LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
+
+ set_64bit_val(wqe, 40, info->entry[1].data);
+ /* fallthrough */
+ case 1:
+ set_64bit_val(wqe, 0,
+ LS_64(info->entry[0].cmd, I40IW_CQPSQ_UPESD_SDCMD));
+
+ set_64bit_val(wqe, 8, info->entry[0].data);
+ break;
+ default:
+ break;
+ }
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "UPDATE_PE_SDS WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+ return 0;
+}
+
+/**
+ * i40iw_update_pe_sds - cqp wqe for sd
+ * @dev: ptr to i40iw_dev struct
+ * @info: sd info for sd's
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code i40iw_update_pe_sds(struct i40iw_sc_dev *dev,
+ struct i40iw_update_sds_info *info,
+ u64 scratch)
+{
+ struct i40iw_sc_cqp *cqp = dev->cqp;
+ enum i40iw_status_code ret_code;
+
+ ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
+ if (!ret_code)
+ i40iw_sc_cqp_post_sq(cqp);
+
+ return ret_code;
+}
+
+/**
+ * i40iw_update_sds_noccq - update sd before ccq created
+ * @dev: sc device struct
+ * @info: sd info for sd's
+ */
+enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
+ struct i40iw_update_sds_info *info)
+{
+ u32 error, val, tail;
+ struct i40iw_sc_cqp *cqp = dev->cqp;
+ enum i40iw_status_code ret_code;
+
+ ret_code = cqp_sds_wqe_fill(cqp, info, 0);
+ if (ret_code)
+ return ret_code;
+ i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+ if (error)
+ return I40IW_ERR_CQP_COMPL_ERROR;
+
+ i40iw_sc_cqp_post_sq(cqp);
+ ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+
+ return ret_code;
+}
+
+/**
+ * i40iw_sc_suspend_qp - suspend qp for param change
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
+ struct i40iw_sc_qp *qp,
+ u64 scratch)
+{
+ u64 header;
+ u64 *wqe;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_SUSPENDQP_QPID) |
+ LS_64(I40IW_CQP_OP_SUSPEND_QP, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SUSPEND_QP WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_resume_qp - resume qp after suspend
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
+ struct i40iw_sc_qp *qp,
+ u64 scratch)
+{
+ u64 header;
+ u64 *wqe;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe,
+ 16,
+ LS_64(qp->qs_handle, I40IW_CQPSQ_RESUMEQP_QSHANDLE));
+
+ header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_RESUMEQP_QPID) |
+ LS_64(I40IW_CQP_OP_RESUME_QP, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "RESUME_QP WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * i40iw_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(
+ struct i40iw_sc_cqp *cqp,
+ u64 scratch,
+ u8 hmc_fn_id,
+ bool post_sq,
+ bool poll_registers)
+{
+ u64 header;
+ u64 *wqe;
+ u32 tail, val, error;
+ enum i40iw_status_code ret_code = 0;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+ set_64bit_val(wqe,
+ 16,
+ LS_64(hmc_fn_id, I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID));
+
+ header = LS_64(I40IW_CQP_OP_SHMC_PAGES_ALLOCATED, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SHMC_PAGES_ALLOCATED WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+ i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+ if (error) {
+ ret_code = I40IW_ERR_CQP_COMPL_ERROR;
+ return ret_code;
+ }
+ if (post_sq) {
+ i40iw_sc_cqp_post_sq(cqp);
+ if (poll_registers)
+ /* check for cqp sq tail update */
+ ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
+ else
+ ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
+ I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
+ NULL);
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_ring_full - check if cqp ring is full
+ * @cqp: struct for cqp hw
+ */
+static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
+{
+ return I40IW_RING_FULL_ERR(cqp->sq_ring);
+}
+
+/**
+ * i40iw_config_fpm_values - configure HMC objects
+ * @dev: sc device struct
+ * @qp_count: desired qp count
+ */
+enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count)
+{
+ struct i40iw_virt_mem virt_mem;
+ u32 i, mem_size;
+ u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
+ u32 powerof2;
+ u64 sd_needed, bytes_needed;
+ u32 loop_count = 0;
+
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_hmc_fpm_misc *hmc_fpm_misc;
+ enum i40iw_status_code ret_code = 0;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ ret_code = i40iw_sc_init_iw_hmc(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "i40iw_sc_init_iw_hmc returned error_code = %d\n",
+ ret_code);
+ return ret_code;
+ }
+
+ bytes_needed = 0;
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+ bytes_needed +=
+ (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size);
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n",
+ __func__, i, hmc_info->hmc_obj[i].max_cnt,
+ hmc_info->hmc_obj[i].size);
+ }
+ sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
+ __func__, sd_needed, hmc_info->first_sd_index);
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n",
+ __func__, bytes_needed, hmc_info->sd_table.sd_cnt,
+ hmc_fpm_misc->max_sds);
+
+ qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
+ qpwantedoriginal = qpwanted;
+ mrwanted = hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt;
+ pblewanted = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt;
+
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d\n",
+ qp_count, hmc_fpm_misc->max_sds,
+ hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt,
+ hmc_info->hmc_obj[I40IW_HMC_IW_CQ].max_cnt,
+ hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt,
+ hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt);
+
+ do {
+ ++loop_count;
+ hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt = qpwanted;
+ hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt =
+ min(2 * qpwanted, hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt);
+ hmc_info->hmc_obj[I40IW_HMC_IW_SRQ].cnt = 0x00; /* Reserved */
+ hmc_info->hmc_obj[I40IW_HMC_IW_HTE].cnt =
+ qpwanted * hmc_fpm_misc->ht_multiplier;
+ hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt =
+ hmc_info->hmc_obj[I40IW_HMC_IW_ARP].max_cnt;
+ hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
+ hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
+
+ hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted;
+ hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted;
+ hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
+ hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
+ hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
+ hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+ hmc_info->hmc_obj[I40IW_HMC_IW_TIMER].cnt =
+ ((qpwanted) / 512 + 1) * hmc_fpm_misc->timer_bucket;
+ hmc_info->hmc_obj[I40IW_HMC_IW_FSIMC].cnt = 0x00;
+ hmc_info->hmc_obj[I40IW_HMC_IW_FSIAV].cnt = 0x00;
+ hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
+
+ /* How much memory is needed for all the objects. */
+ bytes_needed = 0;
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
+ bytes_needed +=
+ (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
+ sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;
+ if ((loop_count > 1000) ||
+ ((!(loop_count % 10)) &&
+ (qpwanted > qpwantedoriginal * 2 / 3))) {
+ if (qpwanted > FPM_MULTIPLIER) {
+ qpwanted -= FPM_MULTIPLIER;
+ powerof2 = 1;
+ while (powerof2 < qpwanted)
+ powerof2 *= 2;
+ powerof2 /= 2;
+ qpwanted = powerof2;
+ } else {
+ qpwanted /= 2;
+ }
+ }
+ if (mrwanted > FPM_MULTIPLIER * 10)
+ mrwanted -= FPM_MULTIPLIER * 10;
+ if (pblewanted > FPM_MULTIPLIER * 1000)
+ pblewanted -= FPM_MULTIPLIER * 1000;
+ } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
+
+ bytes_needed = 0;
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+ bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n",
+ __func__, i, hmc_info->hmc_obj[i].cnt,
+ hmc_info->hmc_obj[i].size);
+ }
+ sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up not truncate. */
+
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
+ loop_count, sd_needed,
+ hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt,
+ hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt,
+ hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt,
+ hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt);
+
+ ret_code = i40iw_sc_configure_iw_fpm(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "configure_iw_fpm returned error_code[x%08X]\n",
+ i40iw_rd32(dev->hw, dev->is_pf ? I40E_PFPE_CQPERRCODES : I40E_VFPE_CQPERRCODES1));
+ return ret_code;
+ }
+
+ hmc_info->sd_table.sd_cnt = (u32)sd_needed;
+
+ mem_size = sizeof(struct i40iw_hmc_sd_entry) *
+ (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
+ ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
+ if (ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s: failed to allocate memory for sd_entry buffer\n",
+ __func__);
+ return ret_code;
+ }
+ hmc_info->sd_table.sd_entry = virt_mem.va;
+
+ return ret_code;
+}
+
+/**
+ * i40iw_exec_cqp_cmd - execute cqp cmd when wqe are available
+ * @dev: rdma device
+ * @pcmdinfo: cqp command info
+ */
+static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
+ struct cqp_commands_info *pcmdinfo)
+{
+ enum i40iw_status_code status;
+ struct i40iw_dma_mem values_mem;
+
+ dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
+ switch (pcmdinfo->cqp_cmd) {
+ case OP_DELETE_LOCAL_MAC_IPADDR_ENTRY:
+ status = i40iw_sc_del_local_mac_ipaddr_entry(
+ pcmdinfo->in.u.del_local_mac_ipaddr_entry.cqp,
+ pcmdinfo->in.u.del_local_mac_ipaddr_entry.scratch,
+ pcmdinfo->in.u.del_local_mac_ipaddr_entry.entry_idx,
+ pcmdinfo->in.u.del_local_mac_ipaddr_entry.ignore_ref_count,
+ pcmdinfo->post_sq);
+ break;
+ case OP_CEQ_DESTROY:
+ status = i40iw_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
+ pcmdinfo->in.u.ceq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_AEQ_DESTROY:
+ status = i40iw_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
+ pcmdinfo->in.u.aeq_destroy.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_DELETE_ARP_CACHE_ENTRY:
+ status = i40iw_sc_del_arp_cache_entry(
+ pcmdinfo->in.u.del_arp_cache_entry.cqp,
+ pcmdinfo->in.u.del_arp_cache_entry.scratch,
+ pcmdinfo->in.u.del_arp_cache_entry.arp_index,
+ pcmdinfo->post_sq);
+ break;
+ case OP_MANAGE_APBVT_ENTRY:
+ status = i40iw_sc_manage_apbvt_entry(
+ pcmdinfo->in.u.manage_apbvt_entry.cqp,
+ &pcmdinfo->in.u.manage_apbvt_entry.info,
+ pcmdinfo->in.u.manage_apbvt_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_CEQ_CREATE:
+ status = i40iw_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
+ pcmdinfo->in.u.ceq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_AEQ_CREATE:
+ status = i40iw_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
+ pcmdinfo->in.u.aeq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY:
+ status = i40iw_sc_alloc_local_mac_ipaddr_entry(
+ pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.cqp,
+ pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_ADD_LOCAL_MAC_IPADDR_ENTRY:
+ status = i40iw_sc_add_local_mac_ipaddr_entry(
+ pcmdinfo->in.u.add_local_mac_ipaddr_entry.cqp,
+ &pcmdinfo->in.u.add_local_mac_ipaddr_entry.info,
+ pcmdinfo->in.u.add_local_mac_ipaddr_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_MANAGE_QHASH_TABLE_ENTRY:
+ status = i40iw_sc_manage_qhash_table_entry(
+ pcmdinfo->in.u.manage_qhash_table_entry.cqp,
+ &pcmdinfo->in.u.manage_qhash_table_entry.info,
+ pcmdinfo->in.u.manage_qhash_table_entry.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_QP_MODIFY:
+ status = i40iw_sc_qp_modify(
+ pcmdinfo->in.u.qp_modify.qp,
+ &pcmdinfo->in.u.qp_modify.info,
+ pcmdinfo->in.u.qp_modify.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_QP_UPLOAD_CONTEXT:
+ status = i40iw_sc_qp_upload_context(
+ pcmdinfo->in.u.qp_upload_context.dev,
+ &pcmdinfo->in.u.qp_upload_context.info,
+ pcmdinfo->in.u.qp_upload_context.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_CQ_CREATE:
+ status = i40iw_sc_cq_create(
+ pcmdinfo->in.u.cq_create.cq,
+ pcmdinfo->in.u.cq_create.scratch,
+ pcmdinfo->in.u.cq_create.check_overflow,
+ pcmdinfo->post_sq);
+ break;
+ case OP_CQ_DESTROY:
+ status = i40iw_sc_cq_destroy(
+ pcmdinfo->in.u.cq_destroy.cq,
+ pcmdinfo->in.u.cq_destroy.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_QP_CREATE:
+ status = i40iw_sc_qp_create(
+ pcmdinfo->in.u.qp_create.qp,
+ &pcmdinfo->in.u.qp_create.info,
+ pcmdinfo->in.u.qp_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_QP_DESTROY:
+ status = i40iw_sc_qp_destroy(
+ pcmdinfo->in.u.qp_destroy.qp,
+ pcmdinfo->in.u.qp_destroy.scratch,
+ pcmdinfo->in.u.qp_destroy.remove_hash_idx,
+ pcmdinfo->in.u.qp_destroy.
+ ignore_mw_bnd,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_ALLOC_STAG:
+ status = i40iw_sc_alloc_stag(
+ pcmdinfo->in.u.alloc_stag.dev,
+ &pcmdinfo->in.u.alloc_stag.info,
+ pcmdinfo->in.u.alloc_stag.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_MR_REG_NON_SHARED:
+ status = i40iw_sc_mr_reg_non_shared(
+ pcmdinfo->in.u.mr_reg_non_shared.dev,
+ &pcmdinfo->in.u.mr_reg_non_shared.info,
+ pcmdinfo->in.u.mr_reg_non_shared.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_DEALLOC_STAG:
+ status = i40iw_sc_dealloc_stag(
+ pcmdinfo->in.u.dealloc_stag.dev,
+ &pcmdinfo->in.u.dealloc_stag.info,
+ pcmdinfo->in.u.dealloc_stag.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_MW_ALLOC:
+ status = i40iw_sc_mw_alloc(
+ pcmdinfo->in.u.mw_alloc.dev,
+ pcmdinfo->in.u.mw_alloc.scratch,
+ pcmdinfo->in.u.mw_alloc.mw_stag_index,
+ pcmdinfo->in.u.mw_alloc.pd_id,
+ pcmdinfo->post_sq);
+
+ break;
+ case OP_QP_FLUSH_WQES:
+ status = i40iw_sc_qp_flush_wqes(
+ pcmdinfo->in.u.qp_flush_wqes.qp,
+ &pcmdinfo->in.u.qp_flush_wqes.info,
+ pcmdinfo->in.u.qp_flush_wqes.
+ scratch, pcmdinfo->post_sq);
+ break;
+ case OP_ADD_ARP_CACHE_ENTRY:
+ status = i40iw_sc_add_arp_cache_entry(
+ pcmdinfo->in.u.add_arp_cache_entry.cqp,
+ &pcmdinfo->in.u.add_arp_cache_entry.info,
+ pcmdinfo->in.u.add_arp_cache_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_MANAGE_PUSH_PAGE:
+ status = i40iw_sc_manage_push_page(
+ pcmdinfo->in.u.manage_push_page.cqp,
+ &pcmdinfo->in.u.manage_push_page.info,
+ pcmdinfo->in.u.manage_push_page.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case OP_UPDATE_PE_SDS:
+ /* case I40IW_CQP_OP_UPDATE_PE_SDS */
+ status = i40iw_update_pe_sds(
+ pcmdinfo->in.u.update_pe_sds.dev,
+ &pcmdinfo->in.u.update_pe_sds.info,
+ pcmdinfo->in.u.update_pe_sds.
+ scratch);
+
+ break;
+ case OP_MANAGE_HMC_PM_FUNC_TABLE:
+ status = i40iw_sc_manage_hmc_pm_func_table(
+ pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
+ pcmdinfo->in.u.manage_hmc_pm.scratch,
+ (u8)pcmdinfo->in.u.manage_hmc_pm.info.vf_id,
+ pcmdinfo->in.u.manage_hmc_pm.info.free_fcn,
+ true);
+ break;
+ case OP_SUSPEND:
+ status = i40iw_sc_suspend_qp(
+ pcmdinfo->in.u.suspend_resume.cqp,
+ pcmdinfo->in.u.suspend_resume.qp,
+ pcmdinfo->in.u.suspend_resume.scratch);
+ break;
+ case OP_RESUME:
+ status = i40iw_sc_resume_qp(
+ pcmdinfo->in.u.suspend_resume.cqp,
+ pcmdinfo->in.u.suspend_resume.qp,
+ pcmdinfo->in.u.suspend_resume.scratch);
+ break;
+ case OP_MANAGE_VF_PBLE_BP:
+ status = i40iw_manage_vf_pble_bp(
+ pcmdinfo->in.u.manage_vf_pble_bp.cqp,
+ &pcmdinfo->in.u.manage_vf_pble_bp.info,
+ pcmdinfo->in.u.manage_vf_pble_bp.scratch, true);
+ break;
+ case OP_QUERY_FPM_VALUES:
+ values_mem.pa = pcmdinfo->in.u.query_fpm_values.fpm_values_pa;
+ values_mem.va = pcmdinfo->in.u.query_fpm_values.fpm_values_va;
+ status = i40iw_sc_query_fpm_values(
+ pcmdinfo->in.u.query_fpm_values.cqp,
+ pcmdinfo->in.u.query_fpm_values.scratch,
+ pcmdinfo->in.u.query_fpm_values.hmc_fn_id,
+ &values_mem, true, I40IW_CQP_WAIT_EVENT);
+ break;
+ case OP_COMMIT_FPM_VALUES:
+ values_mem.pa = pcmdinfo->in.u.commit_fpm_values.fpm_values_pa;
+ values_mem.va = pcmdinfo->in.u.commit_fpm_values.fpm_values_va;
+ status = i40iw_sc_commit_fpm_values(
+ pcmdinfo->in.u.commit_fpm_values.cqp,
+ pcmdinfo->in.u.commit_fpm_values.scratch,
+ pcmdinfo->in.u.commit_fpm_values.hmc_fn_id,
+ &values_mem,
+ true,
+ I40IW_CQP_WAIT_EVENT);
+ break;
+ default:
+ status = I40IW_NOT_SUPPORTED;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * i40iw_process_cqp_cmd - process all cqp commands
+ * @dev: sc device struct
+ * @pcmdinfo: cqp command info
+ */
+enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
+ struct cqp_commands_info *pcmdinfo)
+{
+ enum i40iw_status_code status = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cqp_lock, flags);
+ if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
+ status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
+ else
+ list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
+ spin_unlock_irqrestore(&dev->cqp_lock, flags);
+ return status;
+}
+
+/**
+ * i40iw_process_bh - called from tasklet for cqp list
+ * @dev: sc device struct
+ */
+enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
+{
+ enum i40iw_status_code status = 0;
+ struct cqp_commands_info *pcmdinfo;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cqp_lock, flags);
+ while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
+ pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
+
+ status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
+ if (status)
+ break;
+ }
+ spin_unlock_irqrestore(&dev->cqp_lock, flags);
+ return status;
+}
+
+/**
+ * i40iw_iwarp_opcode - determine if incoming is rdma layer
+ * @info: aeq info for the packet
+ * @pkt: packet for error
+ */
+static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
+{
+ u16 *mpa;
+ u32 opcode = 0xffffffff;
+
+ if (info->q2_data_written) {
+ mpa = (u16 *)pkt;
+ opcode = ntohs(mpa[1]) & 0xf;
+ }
+ return opcode;
+}
+
+/**
+ * i40iw_locate_mpa - return pointer to mpa in the pkt
+ * @pkt: packet with data
+ */
+static u8 *i40iw_locate_mpa(u8 *pkt)
+{
+ /* skip over ethernet header */
+ pkt += I40IW_MAC_HLEN;
+
+ /* Skip over IP and TCP headers */
+ pkt += 4 * (pkt[0] & 0x0f);
+ pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+ return pkt;
+}
+
+/**
+ * i40iw_setup_termhdr - termhdr for terminate pkt
+ * @qp: sc qp ptr for pkt
+ * @hdr: term hdr
+ * @opcode: flush opcode for termhdr
+ * @layer_etype: error layer + error type
+ * @err: error cod ein the header
+ */
+static void i40iw_setup_termhdr(struct i40iw_sc_qp *qp,
+ struct i40iw_terminate_hdr *hdr,
+ enum i40iw_flush_opcode opcode,
+ u8 layer_etype,
+ u8 err)
+{
+ qp->flush_code = opcode;
+ hdr->layer_etype = layer_etype;
+ hdr->error_code = err;
+}
+
+/**
+ * i40iw_bld_terminate_hdr - build terminate message header
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
+ struct i40iw_aeqe_info *info)
+{
+ u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+ u16 ddp_seg_len;
+ int copy_len = 0;
+ u8 is_tagged = 0;
+ enum i40iw_flush_opcode flush_code = FLUSH_INVALID;
+ u32 opcode;
+ struct i40iw_terminate_hdr *termhdr;
+
+ termhdr = (struct i40iw_terminate_hdr *)qp->q2_buf;
+ memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
+
+ if (info->q2_data_written) {
+ /* Use data from offending packet to fill in ddp & rdma hdrs */
+ pkt = i40iw_locate_mpa(pkt);
+ ddp_seg_len = ntohs(*(u16 *)pkt);
+ if (ddp_seg_len) {
+ copy_len = 2;
+ termhdr->hdrct = DDP_LEN_FLAG;
+ if (pkt[2] & 0x80) {
+ is_tagged = 1;
+ if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+ copy_len += TERM_DDP_LEN_TAGGED;
+ termhdr->hdrct |= DDP_HDR_FLAG;
+ }
+ } else {
+ if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+ copy_len += TERM_DDP_LEN_UNTAGGED;
+ termhdr->hdrct |= DDP_HDR_FLAG;
+ }
+
+ if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+ if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+ copy_len += TERM_RDMA_LEN;
+ termhdr->hdrct |= RDMA_HDR_FLAG;
+ }
+ }
+ }
+ }
+ }
+
+ opcode = i40iw_iwarp_opcode(info, pkt);
+
+ switch (info->ae_id) {
+ case I40IW_AE_AMP_UNALLOCATED_STAG:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ if (opcode == I40IW_OP_TYPE_RDMA_WRITE)
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_STAG);
+ else
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
+ break;
+ case I40IW_AE_AMP_BOUNDS_VIOLATION:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ if (info->q2_data_written)
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_BOUNDS);
+ else
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_BOUNDS);
+ break;
+ case I40IW_AE_AMP_BAD_PD:
+ switch (opcode) {
+ case I40IW_OP_TYPE_RDMA_WRITE:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_UNASSOC_STAG);
+ break;
+ case I40IW_OP_TYPE_SEND_INV:
+ case I40IW_OP_TYPE_SEND_SOL_INV:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_CANT_INV_STAG);
+ break;
+ default:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_UNASSOC_STAG);
+ }
+ break;
+ case I40IW_AE_AMP_INVALID_STAG:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
+ break;
+ case I40IW_AE_AMP_BAD_QP:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
+ break;
+ case I40IW_AE_AMP_BAD_STAG_KEY:
+ case I40IW_AE_AMP_BAD_STAG_INDEX:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ switch (opcode) {
+ case I40IW_OP_TYPE_SEND_INV:
+ case I40IW_OP_TYPE_SEND_SOL_INV:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_CANT_INV_STAG);
+ break;
+ default:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_STAG);
+ }
+ break;
+ case I40IW_AE_AMP_RIGHTS_VIOLATION:
+ case I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case I40IW_AE_PRIV_OPERATION_DENIED:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_ACCESS);
+ break;
+ case I40IW_AE_AMP_TO_WRAP:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP);
+ break;
+ case I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_MPA << 4) | DDP_LLP, MPA_MARKER);
+ break;
+ case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
+ break;
+ case I40IW_AE_LLP_SEGMENT_TOO_LARGE:
+ case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
+ break;
+ case I40IW_AE_LCE_QP_CATASTROPHIC:
+ case I40IW_AE_DDP_NO_L_BIT:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
+ (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
+ break;
+ case I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN:
+ case I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_RANGE);
+ break;
+ case I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_TOO_LONG);
+ break;
+ case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
+ if (is_tagged)
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_DDP_VER);
+ else
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_DDP_VER);
+ break;
+ case I40IW_AE_DDP_UBE_INVALID_MO:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MO);
+ break;
+ case I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_NO_BUF);
+ break;
+ case I40IW_AE_DDP_UBE_INVALID_QN:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
+ break;
+ case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_RDMAP_VER);
+ break;
+ case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNEXPECTED_OP);
+ break;
+ default:
+ i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNSPECIFIED);
+ break;
+ }
+
+ if (copy_len)
+ memcpy(termhdr + 1, pkt, copy_len);
+
+ if (flush_code && !info->in_rdrsp_wr)
+ qp->sq_flush = (info->sq) ? true : false;
+
+ return sizeof(struct i40iw_terminate_hdr) + copy_len;
+}
+
+/**
+ * i40iw_terminate_send_fin() - Send fin for terminate message
+ * @qp: qp associated with received terminate AE
+ */
+void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp)
+{
+ /* Send the fin only */
+ i40iw_term_modify_qp(qp,
+ I40IW_QP_STATE_TERMINATE,
+ I40IWQP_TERM_SEND_FIN_ONLY,
+ 0);
+}
+
+/**
+ * i40iw_terminate_connection() - Bad AE and send terminate to remote QP
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
+{
+ u8 termlen = 0;
+
+ if (qp->term_flags & I40IW_TERM_SENT)
+ return; /* Sanity check */
+
+ /* Eventtype can change from bld_terminate_hdr */
+ qp->eventtype = TERM_EVENT_QP_FATAL;
+ termlen = i40iw_bld_terminate_hdr(qp, info);
+ i40iw_terminate_start_timer(qp);
+ qp->term_flags |= I40IW_TERM_SENT;
+ i40iw_term_modify_qp(qp, I40IW_QP_STATE_TERMINATE,
+ I40IWQP_TERM_SEND_TERM_ONLY, termlen);
+}
+
+/**
+ * i40iw_terminate_received - handle terminate received AE
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
+{
+ u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+ u32 *mpa;
+ u8 ddp_ctl;
+ u8 rdma_ctl;
+ u16 aeq_id = 0;
+ struct i40iw_terminate_hdr *termhdr;
+
+ mpa = (u32 *)i40iw_locate_mpa(pkt);
+ if (info->q2_data_written) {
+ /* did not validate the frame - do it now */
+ ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
+ rdma_ctl = ntohl(mpa[0]) & 0xff;
+ if ((ddp_ctl & 0xc0) != 0x40)
+ aeq_id = I40IW_AE_LCE_QP_CATASTROPHIC;
+ else if ((ddp_ctl & 0x03) != 1)
+ aeq_id = I40IW_AE_DDP_UBE_INVALID_DDP_VERSION;
+ else if (ntohl(mpa[2]) != 2)
+ aeq_id = I40IW_AE_DDP_UBE_INVALID_QN;
+ else if (ntohl(mpa[3]) != 1)
+ aeq_id = I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN;
+ else if (ntohl(mpa[4]) != 0)
+ aeq_id = I40IW_AE_DDP_UBE_INVALID_MO;
+ else if ((rdma_ctl & 0xc0) != 0x40)
+ aeq_id = I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+ info->ae_id = aeq_id;
+ if (info->ae_id) {
+ /* Bad terminate recvd - send back a terminate */
+ i40iw_terminate_connection(qp, info);
+ return;
+ }
+ }
+
+ qp->term_flags |= I40IW_TERM_RCVD;
+ qp->eventtype = TERM_EVENT_QP_FATAL;
+ termhdr = (struct i40iw_terminate_hdr *)&mpa[5];
+ if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
+ termhdr->layer_etype == RDMAP_REMOTE_OP) {
+ i40iw_terminate_done(qp, 0);
+ } else {
+ i40iw_terminate_start_timer(qp);
+ i40iw_terminate_send_fin(qp);
+ }
+}
+
+/**
+ * i40iw_hw_stat_init - Initiliaze HW stats table
+ * @devstat: pestat struct
+ * @fcn_idx: PCI fn id
+ * @hw: PF i40iw_hw structure.
+ * @is_pf: Is it a PF?
+ *
+ * Populate the HW stat table with register offset addr for each
+ * stat. And start the perioidic stats timer.
+ */
+static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat,
+ u8 fcn_idx,
+ struct i40iw_hw *hw, bool is_pf)
+{
+ u32 stat_reg_offset;
+ u32 stat_index;
+ struct i40iw_dev_hw_stat_offsets *stat_table =
+ &devstat->hw_stat_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+
+ devstat->hw = hw;
+
+ if (is_pf) {
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+ I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+ I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+ I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+ I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+ I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+ I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+ I40E_GLPES_PFTCPRTXSEG(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+ I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+ I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
+
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+ I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+ I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+ I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+ I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+ I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+ I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+ I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+ I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+ I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+ I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+ I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+ I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+ I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+ I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+ I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+ I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+ I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+ I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+ I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+ I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+ I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+ I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+ I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+ I40E_GLPES_PFRDMAVINVLO(fcn_idx);
+ } else {
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+ I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+ I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+ I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+ I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+ I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+ I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+ I40E_GLPES_VFTCPRTXSEG(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+ I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
+ stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+ I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
+
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+ I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+ I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+ I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+ I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+ I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+ I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+ I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+ I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+ I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+ I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+ I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+ I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+ I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+ I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+ I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+ I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+ I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+ I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+ I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+ I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+ I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+ I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+ I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
+ stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+ I40E_GLPES_VFRDMAVINVLO(fcn_idx);
+ }
+
+ for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stat_index++) {
+ stat_reg_offset = stat_table->stat_offset_64[stat_index];
+ last_rd_stats->stat_value_64[stat_index] =
+ readq(devstat->hw->hw_addr + stat_reg_offset);
+ }
+
+ for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stat_index++) {
+ stat_reg_offset = stat_table->stat_offset_32[stat_index];
+ last_rd_stats->stat_value_32[stat_index] =
+ i40iw_rd32(devstat->hw, stat_reg_offset);
+ }
+}
+
+/**
+ * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs.
+ * @devstat: pestat struct
+ * @index: index in HW stat table which contains offset reg-addr
+ * @value: hw stat value
+ */
+static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat,
+ enum i40iw_hw_stat_index_32b index,
+ u64 *value)
+{
+ struct i40iw_dev_hw_stat_offsets *stat_table =
+ &devstat->hw_stat_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+ struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+ u64 new_stat_value = 0;
+ u32 stat_reg_offset = stat_table->stat_offset_32[index];
+
+ new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset);
+ /*roll-over case */
+ if (new_stat_value < last_rd_stats->stat_value_32[index])
+ hw_stats->stat_value_32[index] += new_stat_value;
+ else
+ hw_stats->stat_value_32[index] +=
+ new_stat_value - last_rd_stats->stat_value_32[index];
+ last_rd_stats->stat_value_32[index] = new_stat_value;
+ *value = hw_stats->stat_value_32[index];
+}
+
+/**
+ * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs.
+ * @devstat: pestat struct
+ * @index: index in HW stat table which contains offset reg-addr
+ * @value: hw stat value
+ */
+static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat,
+ enum i40iw_hw_stat_index_64b index,
+ u64 *value)
+{
+ struct i40iw_dev_hw_stat_offsets *stat_table =
+ &devstat->hw_stat_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+ struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+ u64 new_stat_value = 0;
+ u32 stat_reg_offset = stat_table->stat_offset_64[index];
+
+ new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset);
+ /*roll-over case */
+ if (new_stat_value < last_rd_stats->stat_value_64[index])
+ hw_stats->stat_value_64[index] += new_stat_value;
+ else
+ hw_stats->stat_value_64[index] +=
+ new_stat_value - last_rd_stats->stat_value_64[index];
+ last_rd_stats->stat_value_64[index] = new_stat_value;
+ *value = hw_stats->stat_value_64[index];
+}
+
+/**
+ * i40iw_hw_stat_read_all - read all HW stat counters
+ * @devstat: pestat struct
+ * @stat_values: hw stats structure
+ *
+ * Read all the HW stat counters and populates hw_stats structure
+ * of passed-in dev's pestat as well as copy created in stat_values.
+ */
+static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat,
+ struct i40iw_dev_hw_stats *stat_values)
+{
+ u32 stat_index;
+
+ for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stat_index++)
+ i40iw_hw_stat_read_32(devstat, stat_index,
+ &stat_values->stat_value_32[stat_index]);
+ for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stat_index++)
+ i40iw_hw_stat_read_64(devstat, stat_index,
+ &stat_values->stat_value_64[stat_index]);
+}
+
+/**
+ * i40iw_hw_stat_refresh_all - Update all HW stat structs
+ * @devstat: pestat struct
+ * @stat_values: hw stats structure
+ *
+ * Read all the HW stat counters to refresh values in hw_stats structure
+ * of passed-in dev's pestat
+ */
+static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat)
+{
+ u64 stat_value;
+ u32 stat_index;
+
+ for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stat_index++)
+ i40iw_hw_stat_read_32(devstat, stat_index, &stat_value);
+ for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stat_index++)
+ i40iw_hw_stat_read_64(devstat, stat_index, &stat_value);
+}
+
+static struct i40iw_cqp_ops iw_cqp_ops = {
+ i40iw_sc_cqp_init,
+ i40iw_sc_cqp_create,
+ i40iw_sc_cqp_post_sq,
+ i40iw_sc_cqp_get_next_send_wqe,
+ i40iw_sc_cqp_destroy,
+ i40iw_sc_poll_for_cqp_op_done
+};
+
+static struct i40iw_ccq_ops iw_ccq_ops = {
+ i40iw_sc_ccq_init,
+ i40iw_sc_ccq_create,
+ i40iw_sc_ccq_destroy,
+ i40iw_sc_ccq_create_done,
+ i40iw_sc_ccq_get_cqe_info,
+ i40iw_sc_ccq_arm
+};
+
+static struct i40iw_ceq_ops iw_ceq_ops = {
+ i40iw_sc_ceq_init,
+ i40iw_sc_ceq_create,
+ i40iw_sc_cceq_create_done,
+ i40iw_sc_cceq_destroy_done,
+ i40iw_sc_cceq_create,
+ i40iw_sc_ceq_destroy,
+ i40iw_sc_process_ceq
+};
+
+static struct i40iw_aeq_ops iw_aeq_ops = {
+ i40iw_sc_aeq_init,
+ i40iw_sc_aeq_create,
+ i40iw_sc_aeq_destroy,
+ i40iw_sc_get_next_aeqe,
+ i40iw_sc_repost_aeq_entries,
+ i40iw_sc_aeq_create_done,
+ i40iw_sc_aeq_destroy_done
+};
+
+/* iwarp pd ops */
+static struct i40iw_pd_ops iw_pd_ops = {
+ i40iw_sc_pd_init,
+};
+
+static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
+ i40iw_sc_qp_init,
+ i40iw_sc_qp_create,
+ i40iw_sc_qp_modify,
+ i40iw_sc_qp_destroy,
+ i40iw_sc_qp_flush_wqes,
+ i40iw_sc_qp_upload_context,
+ i40iw_sc_qp_setctx,
+ i40iw_sc_send_lsmm,
+ i40iw_sc_send_lsmm_nostag,
+ i40iw_sc_send_rtt,
+ i40iw_sc_post_wqe0,
+};
+
+static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
+ i40iw_sc_cq_init,
+ i40iw_sc_cq_create,
+ i40iw_sc_cq_destroy,
+ i40iw_sc_cq_modify,
+};
+
+static struct i40iw_mr_ops iw_mr_ops = {
+ i40iw_sc_alloc_stag,
+ i40iw_sc_mr_reg_non_shared,
+ i40iw_sc_mr_reg_shared,
+ i40iw_sc_dealloc_stag,
+ i40iw_sc_query_stag,
+ i40iw_sc_mw_alloc
+};
+
+static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
+ i40iw_sc_manage_push_page,
+ i40iw_sc_manage_hmc_pm_func_table,
+ i40iw_sc_set_hmc_resource_profile,
+ i40iw_sc_commit_fpm_values,
+ i40iw_sc_query_fpm_values,
+ i40iw_sc_static_hmc_pages_allocated,
+ i40iw_sc_add_arp_cache_entry,
+ i40iw_sc_del_arp_cache_entry,
+ i40iw_sc_query_arp_cache_entry,
+ i40iw_sc_manage_apbvt_entry,
+ i40iw_sc_manage_qhash_table_entry,
+ i40iw_sc_alloc_local_mac_ipaddr_entry,
+ i40iw_sc_add_local_mac_ipaddr_entry,
+ i40iw_sc_del_local_mac_ipaddr_entry,
+ i40iw_sc_cqp_nop,
+ i40iw_sc_commit_fpm_values_done,
+ i40iw_sc_query_fpm_values_done,
+ i40iw_sc_manage_hmc_pm_func_table_done,
+ i40iw_sc_suspend_qp,
+ i40iw_sc_resume_qp
+};
+
+static struct i40iw_hmc_ops iw_hmc_ops = {
+ i40iw_sc_init_iw_hmc,
+ i40iw_sc_parse_fpm_query_buf,
+ i40iw_sc_configure_iw_fpm,
+ i40iw_sc_parse_fpm_commit_buf,
+ i40iw_sc_create_hmc_obj,
+ i40iw_sc_del_hmc_obj,
+ NULL,
+ NULL
+};
+
+static const struct i40iw_device_pestat_ops iw_device_pestat_ops = {
+ i40iw_hw_stat_init,
+ i40iw_hw_stat_read_32,
+ i40iw_hw_stat_read_64,
+ i40iw_hw_stat_read_all,
+ i40iw_hw_stat_refresh_all
+};
+
+/**
+ * i40iw_device_init_pestat - Initialize the pestat structure
+ * @dev: pestat struct
+ */
+enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat)
+{
+ devstat->ops = iw_device_pestat_ops;
+ return 0;
+}
+
+/**
+ * i40iw_device_init - Initialize IWARP device
+ * @dev: IWARP device pointer
+ * @info: IWARP init info
+ */
+enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
+ struct i40iw_device_init_info *info)
+{
+ u32 val;
+ u32 vchnl_ver = 0;
+ u16 hmc_fcn = 0;
+ enum i40iw_status_code ret_code = 0;
+ u8 db_size;
+
+ spin_lock_init(&dev->cqp_lock);
+ INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for the cqp commands backlog. */
+
+ i40iw_device_init_uk(&dev->dev_uk);
+
+ dev->debug_mask = info->debug_mask;
+
+ ret_code = i40iw_device_init_pestat(&dev->dev_pestat);
+ if (ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_DEV,
+ "%s: i40iw_device_init_pestat failed\n", __func__);
+ return ret_code;
+ }
+ dev->hmc_fn_id = info->hmc_fn_id;
+ dev->qs_handle = info->qs_handle;
+ dev->exception_lan_queue = info->exception_lan_queue;
+ dev->is_pf = info->is_pf;
+
+ dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
+ dev->fpm_query_buf = info->fpm_query_buf;
+
+ dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
+ dev->fpm_commit_buf = info->fpm_commit_buf;
+
+ dev->hw = info->hw;
+ dev->hw->hw_addr = info->bar0;
+
+ val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
+ dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
+
+ if (dev->is_pf) {
+ dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat,
+ dev->hmc_fn_id, dev->hw, true);
+ spin_lock_init(&dev->dev_pestat.stats_lock);
+ /*start the periodic stats_timer */
+ i40iw_hw_stats_start_timer(dev);
+ val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
+ db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
+ if ((db_size != I40IW_PE_DB_SIZE_4M) &&
+ (db_size != I40IW_PE_DB_SIZE_8M)) {
+ i40iw_debug(dev, I40IW_DEBUG_DEV,
+ "%s: PE doorbell is not enabled in CSR val 0x%x\n",
+ __func__, val);
+ ret_code = I40IW_ERR_PE_DOORBELL_NOT_ENABLED;
+ return ret_code;
+ }
+ dev->db_addr = dev->hw->hw_addr + I40IW_DB_ADDR_OFFSET;
+ dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_pf;
+ } else {
+ dev->db_addr = dev->hw->hw_addr + I40IW_VF_DB_ADDR_OFFSET;
+ }
+
+ dev->cqp_ops = &iw_cqp_ops;
+ dev->ccq_ops = &iw_ccq_ops;
+ dev->ceq_ops = &iw_ceq_ops;
+ dev->aeq_ops = &iw_aeq_ops;
+ dev->cqp_misc_ops = &iw_cqp_misc_ops;
+ dev->iw_pd_ops = &iw_pd_ops;
+ dev->iw_priv_qp_ops = &iw_priv_qp_ops;
+ dev->iw_priv_cq_ops = &iw_priv_cq_ops;
+ dev->mr_ops = &iw_mr_ops;
+ dev->hmc_ops = &iw_hmc_ops;
+ dev->vchnl_if.vchnl_send = info->vchnl_send;
+ if (dev->vchnl_if.vchnl_send)
+ dev->vchnl_up = true;
+ else
+ dev->vchnl_up = false;
+ if (!dev->is_pf) {
+ dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_vf;
+ ret_code = i40iw_vchnl_vf_get_ver(dev, &vchnl_ver);
+ if (!ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_DEV,
+ "%s: Get Channel version rc = 0x%0x, version is %u\n",
+ __func__, ret_code, vchnl_ver);
+ ret_code = i40iw_vchnl_vf_get_hmc_fcn(dev, &hmc_fcn);
+ if (!ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_DEV,
+ "%s Get HMC function rc = 0x%0x, hmc fcn is %u\n",
+ __func__, ret_code, hmc_fcn);
+ dev->hmc_fn_id = (u8)hmc_fcn;
+ }
+ }
+ }
+ dev->iw_vf_cqp_ops = &iw_vf_cqp_ops;
+
+ return ret_code;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
new file mode 100644
index 000000000000..aab88d65f805
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -0,0 +1,1713 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_D_H
+#define I40IW_D_H
+
+#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
+#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
+
+#define I40IW_PUSH_OFFSET (4 * 1024 * 1024)
+#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16
+#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024)
+#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2
+
+#define I40IW_PE_DB_SIZE_4M 1
+#define I40IW_PE_DB_SIZE_8M 2
+
+#define I40IW_DDP_VER 1
+#define I40IW_RDMAP_VER 1
+
+#define I40IW_RDMA_MODE_RDMAC 0
+#define I40IW_RDMA_MODE_IETF 1
+
+#define I40IW_QP_STATE_INVALID 0
+#define I40IW_QP_STATE_IDLE 1
+#define I40IW_QP_STATE_RTS 2
+#define I40IW_QP_STATE_CLOSING 3
+#define I40IW_QP_STATE_RESERVED 4
+#define I40IW_QP_STATE_TERMINATE 5
+#define I40IW_QP_STATE_ERROR 6
+
+#define I40IW_STAG_STATE_INVALID 0
+#define I40IW_STAG_STATE_VALID 1
+
+#define I40IW_STAG_TYPE_SHARED 0
+#define I40IW_STAG_TYPE_NONSHARED 1
+
+#define I40IW_MAX_USER_PRIORITY 8
+
+#define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits)
+#define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits)
+#define LS_32_1(val, bits) (u32)(val << bits)
+#define RS_32_1(val, bits) (u32)(val >> bits)
+#define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
+
+#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
+
+#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
+#define LS_32(val, field) ((val << field ## _SHIFT) & (field ## _MASK))
+#define RS_32(val, field) ((val & field ## _MASK) >> field ## _SHIFT)
+
+#define TERM_DDP_LEN_TAGGED 14
+#define TERM_DDP_LEN_UNTAGGED 18
+#define TERM_RDMA_LEN 28
+#define RDMA_OPCODE_MASK 0x0f
+#define RDMA_READ_REQ_OPCODE 1
+#define Q2_BAD_FRAME_OFFSET 72
+#define CQE_MAJOR_DRV 0x8000
+
+#define I40IW_TERM_SENT 0x01
+#define I40IW_TERM_RCVD 0x02
+#define I40IW_TERM_DONE 0x04
+#define I40IW_MAC_HLEN 14
+
+#define I40IW_INVALID_WQE_INDEX 0xffffffff
+
+#define I40IW_CQP_WAIT_POLL_REGS 1
+#define I40IW_CQP_WAIT_POLL_CQ 2
+#define I40IW_CQP_WAIT_EVENT 3
+
+#define I40IW_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
+#define I40IW_GET_CURRENT_CQ_ELEMENT(_cq) \
+ ( \
+ &((_cq)->cq_base[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
+ )
+#define I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(_cq) \
+ ( \
+ &(((struct i40iw_extended_cqe *) \
+ ((_cq)->cq_base))[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
+ )
+
+#define I40IW_GET_CURRENT_AEQ_ELEMENT(_aeq) \
+ ( \
+ &_aeq->aeqe_base[I40IW_RING_GETCURRENT_TAIL(_aeq->aeq_ring)] \
+ )
+
+#define I40IW_GET_CURRENT_CEQ_ELEMENT(_ceq) \
+ ( \
+ &_ceq->ceqe_base[I40IW_RING_GETCURRENT_TAIL(_ceq->ceq_ring)] \
+ )
+
+#define I40IW_AE_SOURCE_RQ 0x1
+#define I40IW_AE_SOURCE_RQ_0011 0x3
+
+#define I40IW_AE_SOURCE_CQ 0x2
+#define I40IW_AE_SOURCE_CQ_0110 0x6
+#define I40IW_AE_SOURCE_CQ_1010 0xA
+#define I40IW_AE_SOURCE_CQ_1110 0xE
+
+#define I40IW_AE_SOURCE_SQ 0x5
+#define I40IW_AE_SOURCE_SQ_0111 0x7
+
+#define I40IW_AE_SOURCE_IN_RR_WR 0x9
+#define I40IW_AE_SOURCE_IN_RR_WR_1011 0xB
+#define I40IW_AE_SOURCE_OUT_RR 0xD
+#define I40IW_AE_SOURCE_OUT_RR_1111 0xF
+
+#define I40IW_TCP_STATE_NON_EXISTENT 0
+#define I40IW_TCP_STATE_CLOSED 1
+#define I40IW_TCP_STATE_LISTEN 2
+#define I40IW_STATE_SYN_SEND 3
+#define I40IW_TCP_STATE_SYN_RECEIVED 4
+#define I40IW_TCP_STATE_ESTABLISHED 5
+#define I40IW_TCP_STATE_CLOSE_WAIT 6
+#define I40IW_TCP_STATE_FIN_WAIT_1 7
+#define I40IW_TCP_STATE_CLOSING 8
+#define I40IW_TCP_STATE_LAST_ACK 9
+#define I40IW_TCP_STATE_FIN_WAIT_2 10
+#define I40IW_TCP_STATE_TIME_WAIT 11
+#define I40IW_TCP_STATE_RESERVED_1 12
+#define I40IW_TCP_STATE_RESERVED_2 13
+#define I40IW_TCP_STATE_RESERVED_3 14
+#define I40IW_TCP_STATE_RESERVED_4 15
+
+/* ILQ CQP hash table fields */
+#define I40IW_CQPSQ_QHASH_VLANID_SHIFT 32
+#define I40IW_CQPSQ_QHASH_VLANID_MASK \
+ ((u64)0xfff << I40IW_CQPSQ_QHASH_VLANID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_QPN_SHIFT 32
+#define I40IW_CQPSQ_QHASH_QPN_MASK \
+ ((u64)0x3ffff << I40IW_CQPSQ_QHASH_QPN_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT 0
+#define I40IW_CQPSQ_QHASH_QS_HANDLE_MASK ((u64)0x3ff << I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT 16
+#define I40IW_CQPSQ_QHASH_SRC_PORT_MASK \
+ ((u64)0xffff << I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT 0
+#define I40IW_CQPSQ_QHASH_DEST_PORT_MASK \
+ ((u64)0xffff << I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR0_SHIFT 32
+#define I40IW_CQPSQ_QHASH_ADDR0_MASK \
+ ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR0_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR1_SHIFT 0
+#define I40IW_CQPSQ_QHASH_ADDR1_MASK \
+ ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR1_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR2_SHIFT 32
+#define I40IW_CQPSQ_QHASH_ADDR2_MASK \
+ ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR2_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR3_SHIFT 0
+#define I40IW_CQPSQ_QHASH_ADDR3_MASK \
+ ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR3_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_WQEVALID_SHIFT 63
+#define I40IW_CQPSQ_QHASH_WQEVALID_MASK \
+ ((u64)0x1 << I40IW_CQPSQ_QHASH_WQEVALID_SHIFT)
+#define I40IW_CQPSQ_QHASH_OPCODE_SHIFT 32
+#define I40IW_CQPSQ_QHASH_OPCODE_MASK \
+ ((u64)0x3f << I40IW_CQPSQ_QHASH_OPCODE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_MANAGE_SHIFT 61
+#define I40IW_CQPSQ_QHASH_MANAGE_MASK \
+ ((u64)0x3 << I40IW_CQPSQ_QHASH_MANAGE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT 60
+#define I40IW_CQPSQ_QHASH_IPV4VALID_MASK \
+ ((u64)0x1 << I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_VLANVALID_SHIFT 59
+#define I40IW_CQPSQ_QHASH_VLANVALID_MASK \
+ ((u64)0x1 << I40IW_CQPSQ_QHASH_VLANVALID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT 42
+#define I40IW_CQPSQ_QHASH_ENTRYTYPE_MASK \
+ ((u64)0x7 << I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT)
+/* CQP Host Context */
+#define I40IW_CQPHC_EN_DC_TCP_SHIFT 0
+#define I40IW_CQPHC_EN_DC_TCP_MASK (1UL << I40IW_CQPHC_EN_DC_TCP_SHIFT)
+
+#define I40IW_CQPHC_SQSIZE_SHIFT 8
+#define I40IW_CQPHC_SQSIZE_MASK (0xfUL << I40IW_CQPHC_SQSIZE_SHIFT)
+
+#define I40IW_CQPHC_DISABLE_PFPDUS_SHIFT 1
+#define I40IW_CQPHC_DISABLE_PFPDUS_MASK (0x1UL << I40IW_CQPHC_DISABLE_PFPDUS_SHIFT)
+
+#define I40IW_CQPHC_ENABLED_VFS_SHIFT 32
+#define I40IW_CQPHC_ENABLED_VFS_MASK (0x3fULL << I40IW_CQPHC_ENABLED_VFS_SHIFT)
+
+#define I40IW_CQPHC_HMC_PROFILE_SHIFT 0
+#define I40IW_CQPHC_HMC_PROFILE_MASK (0x7ULL << I40IW_CQPHC_HMC_PROFILE_SHIFT)
+
+#define I40IW_CQPHC_SVER_SHIFT 24
+#define I40IW_CQPHC_SVER_MASK (0xffUL << I40IW_CQPHC_SVER_SHIFT)
+
+#define I40IW_CQPHC_SQBASE_SHIFT 9
+#define I40IW_CQPHC_SQBASE_MASK \
+ (0xfffffffffffffeULL << I40IW_CQPHC_SQBASE_SHIFT)
+
+#define I40IW_CQPHC_QPCTX_SHIFT 0
+#define I40IW_CQPHC_QPCTX_MASK \
+ (0xffffffffffffffffULL << I40IW_CQPHC_QPCTX_SHIFT)
+#define I40IW_CQPHC_SVER 1
+
+#define I40IW_CQP_SW_SQSIZE_4 4
+#define I40IW_CQP_SW_SQSIZE_2048 2048
+
+/* iWARP QP Doorbell shadow area */
+#define I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT 0
+#define I40IW_QP_DBSA_HW_SQ_TAIL_MASK \
+ (0x3fffUL << I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT)
+
+/* Completion Queue Doorbell shadow area */
+#define I40IW_CQ_DBSA_CQEIDX_SHIFT 0
+#define I40IW_CQ_DBSA_CQEIDX_MASK (0xfffffUL << I40IW_CQ_DBSA_CQEIDX_SHIFT)
+
+#define I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT 0
+#define I40IW_CQ_DBSA_SW_CQ_SELECT_MASK \
+ (0x3fffUL << I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_NEXT_SHIFT 14
+#define I40IW_CQ_DBSA_ARM_NEXT_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT 15
+#define I40IW_CQ_DBSA_ARM_NEXT_SE_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT 16
+#define I40IW_CQ_DBSA_ARM_SEQ_NUM_MASK \
+ (0x3UL << I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT)
+
+/* CQP and iWARP Completion Queue */
+#define I40IW_CQ_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQ_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CCQ_OPRETVAL_SHIFT 0
+#define I40IW_CCQ_OPRETVAL_MASK (0xffffffffUL << I40IW_CCQ_OPRETVAL_SHIFT)
+
+#define I40IW_CQ_MINERR_SHIFT 0
+#define I40IW_CQ_MINERR_MASK (0xffffUL << I40IW_CQ_MINERR_SHIFT)
+
+#define I40IW_CQ_MAJERR_SHIFT 16
+#define I40IW_CQ_MAJERR_MASK (0xffffUL << I40IW_CQ_MAJERR_SHIFT)
+
+#define I40IW_CQ_WQEIDX_SHIFT 32
+#define I40IW_CQ_WQEIDX_MASK (0x3fffULL << I40IW_CQ_WQEIDX_SHIFT)
+
+#define I40IW_CQ_ERROR_SHIFT 55
+#define I40IW_CQ_ERROR_MASK (1ULL << I40IW_CQ_ERROR_SHIFT)
+
+#define I40IW_CQ_SQ_SHIFT 62
+#define I40IW_CQ_SQ_MASK (1ULL << I40IW_CQ_SQ_SHIFT)
+
+#define I40IW_CQ_VALID_SHIFT 63
+#define I40IW_CQ_VALID_MASK (1ULL << I40IW_CQ_VALID_SHIFT)
+
+#define I40IWCQ_PAYLDLEN_SHIFT 0
+#define I40IWCQ_PAYLDLEN_MASK (0xffffffffUL << I40IWCQ_PAYLDLEN_SHIFT)
+
+#define I40IWCQ_TCPSEQNUM_SHIFT 32
+#define I40IWCQ_TCPSEQNUM_MASK (0xffffffffULL << I40IWCQ_TCPSEQNUM_SHIFT)
+
+#define I40IWCQ_INVSTAG_SHIFT 0
+#define I40IWCQ_INVSTAG_MASK (0xffffffffUL << I40IWCQ_INVSTAG_SHIFT)
+
+#define I40IWCQ_QPID_SHIFT 32
+#define I40IWCQ_QPID_MASK (0x3ffffULL << I40IWCQ_QPID_SHIFT)
+
+#define I40IWCQ_PSHDROP_SHIFT 51
+#define I40IWCQ_PSHDROP_MASK (1ULL << I40IWCQ_PSHDROP_SHIFT)
+
+#define I40IWCQ_SRQ_SHIFT 52
+#define I40IWCQ_SRQ_MASK (1ULL << I40IWCQ_SRQ_SHIFT)
+
+#define I40IWCQ_STAG_SHIFT 53
+#define I40IWCQ_STAG_MASK (1ULL << I40IWCQ_STAG_SHIFT)
+
+#define I40IWCQ_SOEVENT_SHIFT 54
+#define I40IWCQ_SOEVENT_MASK (1ULL << I40IWCQ_SOEVENT_SHIFT)
+
+#define I40IWCQ_OP_SHIFT 56
+#define I40IWCQ_OP_MASK (0x3fULL << I40IWCQ_OP_SHIFT)
+
+/* CEQE format */
+#define I40IW_CEQE_CQCTX_SHIFT 0
+#define I40IW_CEQE_CQCTX_MASK \
+ (0x7fffffffffffffffULL << I40IW_CEQE_CQCTX_SHIFT)
+
+#define I40IW_CEQE_VALID_SHIFT 63
+#define I40IW_CEQE_VALID_MASK (1ULL << I40IW_CEQE_VALID_SHIFT)
+
+/* AEQE format */
+#define I40IW_AEQE_COMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_AEQE_COMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_AEQE_QPCQID_SHIFT 0
+#define I40IW_AEQE_QPCQID_MASK (0x3ffffUL << I40IW_AEQE_QPCQID_SHIFT)
+
+#define I40IW_AEQE_WQDESCIDX_SHIFT 18
+#define I40IW_AEQE_WQDESCIDX_MASK (0x3fffULL << I40IW_AEQE_WQDESCIDX_SHIFT)
+
+#define I40IW_AEQE_OVERFLOW_SHIFT 33
+#define I40IW_AEQE_OVERFLOW_MASK (1ULL << I40IW_AEQE_OVERFLOW_SHIFT)
+
+#define I40IW_AEQE_AECODE_SHIFT 34
+#define I40IW_AEQE_AECODE_MASK (0xffffULL << I40IW_AEQE_AECODE_SHIFT)
+
+#define I40IW_AEQE_AESRC_SHIFT 50
+#define I40IW_AEQE_AESRC_MASK (0xfULL << I40IW_AEQE_AESRC_SHIFT)
+
+#define I40IW_AEQE_IWSTATE_SHIFT 54
+#define I40IW_AEQE_IWSTATE_MASK (0x7ULL << I40IW_AEQE_IWSTATE_SHIFT)
+
+#define I40IW_AEQE_TCPSTATE_SHIFT 57
+#define I40IW_AEQE_TCPSTATE_MASK (0xfULL << I40IW_AEQE_TCPSTATE_SHIFT)
+
+#define I40IW_AEQE_Q2DATA_SHIFT 61
+#define I40IW_AEQE_Q2DATA_MASK (0x3ULL << I40IW_AEQE_Q2DATA_SHIFT)
+
+#define I40IW_AEQE_VALID_SHIFT 63
+#define I40IW_AEQE_VALID_MASK (1ULL << I40IW_AEQE_VALID_SHIFT)
+
+/* CQP SQ WQES */
+#define I40IW_QP_TYPE_IWARP 1
+#define I40IW_QP_TYPE_UDA 2
+#define I40IW_QP_TYPE_CQP 4
+
+#define I40IW_CQ_TYPE_IWARP 1
+#define I40IW_CQ_TYPE_ILQ 2
+#define I40IW_CQ_TYPE_IEQ 3
+#define I40IW_CQ_TYPE_CQP 4
+
+#define I40IWQP_TERM_SEND_TERM_AND_FIN 0
+#define I40IWQP_TERM_SEND_TERM_ONLY 1
+#define I40IWQP_TERM_SEND_FIN_ONLY 2
+#define I40IWQP_TERM_DONOT_SEND_TERM_OR_FIN 3
+
+#define I40IW_CQP_OP_CREATE_QP 0
+#define I40IW_CQP_OP_MODIFY_QP 0x1
+#define I40IW_CQP_OP_DESTROY_QP 0x02
+#define I40IW_CQP_OP_CREATE_CQ 0x03
+#define I40IW_CQP_OP_MODIFY_CQ 0x04
+#define I40IW_CQP_OP_DESTROY_CQ 0x05
+#define I40IW_CQP_OP_CREATE_SRQ 0x06
+#define I40IW_CQP_OP_MODIFY_SRQ 0x07
+#define I40IW_CQP_OP_DESTROY_SRQ 0x08
+#define I40IW_CQP_OP_ALLOC_STAG 0x09
+#define I40IW_CQP_OP_REG_MR 0x0a
+#define I40IW_CQP_OP_QUERY_STAG 0x0b
+#define I40IW_CQP_OP_REG_SMR 0x0c
+#define I40IW_CQP_OP_DEALLOC_STAG 0x0d
+#define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e
+#define I40IW_CQP_OP_MANAGE_ARP 0x0f
+#define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10
+#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11
+#define I40IW_CQP_OP_MANAGE_PE_TEAM 0x12
+#define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13
+#define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
+#define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15
+#define I40IW_CQP_OP_CREATE_CEQ 0x16
+#define I40IW_CQP_OP_DESTROY_CEQ 0x18
+#define I40IW_CQP_OP_CREATE_AEQ 0x19
+#define I40IW_CQP_OP_DESTROY_AEQ 0x1b
+#define I40IW_CQP_OP_CREATE_ADDR_VECT 0x1c
+#define I40IW_CQP_OP_MODIFY_ADDR_VECT 0x1d
+#define I40IW_CQP_OP_DESTROY_ADDR_VECT 0x1e
+#define I40IW_CQP_OP_UPDATE_PE_SDS 0x1f
+#define I40IW_CQP_OP_QUERY_FPM_VALUES 0x20
+#define I40IW_CQP_OP_COMMIT_FPM_VALUES 0x21
+#define I40IW_CQP_OP_FLUSH_WQES 0x22
+#define I40IW_CQP_OP_MANAGE_APBVT 0x23
+#define I40IW_CQP_OP_NOP 0x24
+#define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
+#define I40IW_CQP_OP_CREATE_UDA_MCAST_GROUP 0x26
+#define I40IW_CQP_OP_MODIFY_UDA_MCAST_GROUP 0x27
+#define I40IW_CQP_OP_DESTROY_UDA_MCAST_GROUP 0x28
+#define I40IW_CQP_OP_SUSPEND_QP 0x29
+#define I40IW_CQP_OP_RESUME_QP 0x2a
+#define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b
+#define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE 0x2d
+
+#define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16
+#define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT)
+
+#define I40IW_UDA_QPSQ_OPCODE_SHIFT 32
+#define I40IW_UDA_QPSQ_OPCODE_MASK ((u64)0x3f << I40IW_UDA_QPSQ_OPCODE_SHIFT)
+
+#define I40IW_UDA_QPSQ_MACLEN_SHIFT 56
+#define I40IW_UDA_QPSQ_MACLEN_MASK \
+ ((u64)0x7f << I40IW_UDA_QPSQ_MACLEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_IPLEN_SHIFT 48
+#define I40IW_UDA_QPSQ_IPLEN_MASK \
+ ((u64)0x7f << I40IW_UDA_QPSQ_IPLEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_L4T_SHIFT 30
+#define I40IW_UDA_QPSQ_L4T_MASK \
+ ((u64)0x3 << I40IW_UDA_QPSQ_L4T_SHIFT)
+
+#define I40IW_UDA_QPSQ_IIPT_SHIFT 28
+#define I40IW_UDA_QPSQ_IIPT_MASK \
+ ((u64)0x3 << I40IW_UDA_QPSQ_IIPT_SHIFT)
+
+#define I40IW_UDA_QPSQ_L4LEN_SHIFT 24
+#define I40IW_UDA_QPSQ_L4LEN_MASK ((u64)0xf << I40IW_UDA_QPSQ_L4LEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_AVIDX_SHIFT 0
+#define I40IW_UDA_QPSQ_AVIDX_MASK ((u64)0xffff << I40IW_UDA_QPSQ_AVIDX_SHIFT)
+
+#define I40IW_UDA_QPSQ_VALID_SHIFT 63
+#define I40IW_UDA_QPSQ_VALID_MASK \
+ ((u64)0x1 << I40IW_UDA_QPSQ_VALID_SHIFT)
+
+#define I40IW_UDA_QPSQ_SIGCOMPL_SHIFT 62
+#define I40IW_UDA_QPSQ_SIGCOMPL_MASK ((u64)0x1 << I40IW_UDA_QPSQ_SIGCOMPL_SHIFT)
+
+#define I40IW_UDA_PAYLOADLEN_SHIFT 0
+#define I40IW_UDA_PAYLOADLEN_MASK ((u64)0x3fff << I40IW_UDA_PAYLOADLEN_SHIFT)
+
+#define I40IW_UDA_HDRLEN_SHIFT 16
+#define I40IW_UDA_HDRLEN_MASK ((u64)0x1ff << I40IW_UDA_HDRLEN_SHIFT)
+
+#define I40IW_VLAN_TAG_VALID_SHIFT 50
+#define I40IW_VLAN_TAG_VALID_MASK ((u64)0x1 << I40IW_VLAN_TAG_VALID_SHIFT)
+
+#define I40IW_UDA_L3PROTO_SHIFT 0
+#define I40IW_UDA_L3PROTO_MASK ((u64)0x3 << I40IW_UDA_L3PROTO_SHIFT)
+
+#define I40IW_UDA_L4PROTO_SHIFT 16
+#define I40IW_UDA_L4PROTO_MASK ((u64)0x3 << I40IW_UDA_L4PROTO_SHIFT)
+
+#define I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT 44
+#define I40IW_UDA_QPSQ_DOLOOPBACK_MASK \
+ ((u64)0x1 << I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT)
+
+/* CQP SQ WQE common fields */
+#define I40IW_CQPSQ_OPCODE_SHIFT 32
+#define I40IW_CQPSQ_OPCODE_MASK (0x3fULL << I40IW_CQPSQ_OPCODE_SHIFT)
+
+#define I40IW_CQPSQ_WQEVALID_SHIFT 63
+#define I40IW_CQPSQ_WQEVALID_MASK (1ULL << I40IW_CQPSQ_WQEVALID_SHIFT)
+
+#define I40IW_CQPSQ_TPHVAL_SHIFT 0
+#define I40IW_CQPSQ_TPHVAL_MASK (0xffUL << I40IW_CQPSQ_TPHVAL_SHIFT)
+
+#define I40IW_CQPSQ_TPHEN_SHIFT 60
+#define I40IW_CQPSQ_TPHEN_MASK (1ULL << I40IW_CQPSQ_TPHEN_SHIFT)
+
+#define I40IW_CQPSQ_PBUFADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_PBUFADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Create/Modify/Destroy QP */
+
+#define I40IW_CQPSQ_QP_NEWMSS_SHIFT 32
+#define I40IW_CQPSQ_QP_NEWMSS_MASK (0x3fffULL << I40IW_CQPSQ_QP_NEWMSS_SHIFT)
+
+#define I40IW_CQPSQ_QP_TERMLEN_SHIFT 48
+#define I40IW_CQPSQ_QP_TERMLEN_MASK (0xfULL << I40IW_CQPSQ_QP_TERMLEN_SHIFT)
+
+#define I40IW_CQPSQ_QP_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_QP_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_QP_QPID_SHIFT 0
+#define I40IW_CQPSQ_QP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+#define I40IW_CQPSQ_QP_OP_SHIFT 32
+#define I40IW_CQPSQ_QP_OP_MASK I40IWCQ_OP_MASK
+
+#define I40IW_CQPSQ_QP_ORDVALID_SHIFT 42
+#define I40IW_CQPSQ_QP_ORDVALID_MASK (1ULL << I40IW_CQPSQ_QP_ORDVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_TOECTXVALID_SHIFT 43
+#define I40IW_CQPSQ_QP_TOECTXVALID_MASK \
+ (1ULL << I40IW_CQPSQ_QP_TOECTXVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT 44
+#define I40IW_CQPSQ_QP_CACHEDVARVALID_MASK \
+ (1ULL << I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_VQ_SHIFT 45
+#define I40IW_CQPSQ_QP_VQ_MASK (1ULL << I40IW_CQPSQ_QP_VQ_SHIFT)
+
+#define I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT 46
+#define I40IW_CQPSQ_QP_FORCELOOPBACK_MASK \
+ (1ULL << I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT)
+
+#define I40IW_CQPSQ_QP_CQNUMVALID_SHIFT 47
+#define I40IW_CQPSQ_QP_CQNUMVALID_MASK \
+ (1ULL << I40IW_CQPSQ_QP_CQNUMVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_QPTYPE_SHIFT 48
+#define I40IW_CQPSQ_QP_QPTYPE_MASK (0x3ULL << I40IW_CQPSQ_QP_QPTYPE_SHIFT)
+
+#define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52
+#define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT)
+
+#define I40IW_CQPSQ_QP_STATRSRC_SHIFT 53
+#define I40IW_CQPSQ_QP_STATRSRC_MASK (1ULL << I40IW_CQPSQ_QP_STATRSRC_SHIFT)
+
+#define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54
+#define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK \
+ (1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT)
+
+#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT 55
+#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_MASK \
+ (1ULL << I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT)
+
+#define I40IW_CQPSQ_QP_TERMACT_SHIFT 56
+#define I40IW_CQPSQ_QP_TERMACT_MASK (0x3ULL << I40IW_CQPSQ_QP_TERMACT_SHIFT)
+
+#define I40IW_CQPSQ_QP_RESETCON_SHIFT 58
+#define I40IW_CQPSQ_QP_RESETCON_MASK (1ULL << I40IW_CQPSQ_QP_RESETCON_SHIFT)
+
+#define I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT 59
+#define I40IW_CQPSQ_QP_ARPTABIDXVALID_MASK \
+ (1ULL << I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT 60
+#define I40IW_CQPSQ_QP_NEXTIWSTATE_MASK \
+ (0x7ULL << I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT)
+
+#define I40IW_CQPSQ_QP_DBSHADOWADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_QP_DBSHADOWADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Create/Modify/Destroy CQ */
+#define I40IW_CQPSQ_CQ_CQSIZE_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQSIZE_MASK (0x3ffffUL << I40IW_CQPSQ_CQ_CQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQCTX_MASK \
+ (0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQCTX_MASK \
+ (0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
+
+#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT 0
+#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_MASK \
+ (0x3ffff << I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CEQID_SHIFT 24
+#define I40IW_CQPSQ_CQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CQ_CEQID_SHIFT)
+
+#define I40IW_CQPSQ_CQ_OP_SHIFT 32
+#define I40IW_CQPSQ_CQ_OP_MASK (0x3fULL << I40IW_CQPSQ_CQ_OP_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQRESIZE_SHIFT 43
+#define I40IW_CQPSQ_CQ_CQRESIZE_MASK (1ULL << I40IW_CQPSQ_CQ_CQRESIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT 44
+#define I40IW_CQPSQ_CQ_LPBLSIZE_MASK (3ULL << I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT 46
+#define I40IW_CQPSQ_CQ_CHKOVERFLOW_MASK \
+ (1ULL << I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT)
+
+#define I40IW_CQPSQ_CQ_VIRTMAP_SHIFT 47
+#define I40IW_CQPSQ_CQ_VIRTMAP_MASK (1ULL << I40IW_CQPSQ_CQ_VIRTMAP_SHIFT)
+
+#define I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT 48
+#define I40IW_CQPSQ_CQ_ENCEQEMASK_MASK \
+ (1ULL << I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT 49
+#define I40IW_CQPSQ_CQ_CEQIDVALID_MASK \
+ (1ULL << I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT)
+
+#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT 61
+#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_MASK \
+ (1ULL << I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT)
+
+/* Create/Modify/Destroy Shared Receive Queue */
+
+#define I40IW_CQPSQ_SRQ_RQSIZE_SHIFT 0
+#define I40IW_CQPSQ_SRQ_RQSIZE_MASK (0xfUL << I40IW_CQPSQ_SRQ_RQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT 4
+#define I40IW_CQPSQ_SRQ_RQWQESIZE_MASK \
+ (0x7UL << I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT 32
+#define I40IW_CQPSQ_SRQ_SRQLIMIT_MASK \
+ (0xfffULL << I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_SRQ_SRQCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_SRQ_PDID_SHIFT 16
+#define I40IW_CQPSQ_SRQ_PDID_MASK \
+ (0x7fffULL << I40IW_CQPSQ_SRQ_PDID_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQID_SHIFT 0
+#define I40IW_CQPSQ_SRQ_SRQID_MASK (0x7fffUL << I40IW_CQPSQ_SRQ_SRQID_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_SRQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_SRQ_VIRTMAP_SHIFT I40IW_CQPSQ_CQ_VIRTMAP_SHIFT
+#define I40IW_CQPSQ_SRQ_VIRTMAP_MASK I40IW_CQPSQ_CQ_VIRTMAP_MASK
+
+#define I40IW_CQPSQ_SRQ_TPHEN_SHIFT I40IW_CQPSQ_TPHEN_SHIFT
+#define I40IW_CQPSQ_SRQ_TPHEN_MASK I40IW_CQPSQ_TPHEN_MASK
+
+#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT 61
+#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_MASK \
+ (1ULL << I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT 6
+#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_MASK \
+ (0x3ffffffffffffffULL << I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_MASK \
+ (0xfffffffUL << I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Allocate/Register/Register Shared/Deallocate Stag */
+#define I40IW_CQPSQ_STAG_VA_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_STAG_VA_FBO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_STAG_STAGLEN_SHIFT 0
+#define I40IW_CQPSQ_STAG_STAGLEN_MASK \
+ (0x3fffffffffffULL << I40IW_CQPSQ_STAG_STAGLEN_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PDID_SHIFT 48
+#define I40IW_CQPSQ_STAG_PDID_MASK (0x7fffULL << I40IW_CQPSQ_STAG_PDID_SHIFT)
+
+#define I40IW_CQPSQ_STAG_KEY_SHIFT 0
+#define I40IW_CQPSQ_STAG_KEY_MASK (0xffUL << I40IW_CQPSQ_STAG_KEY_SHIFT)
+
+#define I40IW_CQPSQ_STAG_IDX_SHIFT 8
+#define I40IW_CQPSQ_STAG_IDX_MASK (0xffffffUL << I40IW_CQPSQ_STAG_IDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT 32
+#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_MASK \
+ (0xffffffULL << I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_MR_SHIFT 43
+#define I40IW_CQPSQ_STAG_MR_MASK (1ULL << I40IW_CQPSQ_STAG_MR_SHIFT)
+
+#define I40IW_CQPSQ_STAG_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_STAG_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT 46
+#define I40IW_CQPSQ_STAG_HPAGESIZE_MASK \
+ (1ULL << I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT)
+
+#define I40IW_CQPSQ_STAG_ARIGHTS_SHIFT 48
+#define I40IW_CQPSQ_STAG_ARIGHTS_MASK \
+ (0x1fULL << I40IW_CQPSQ_STAG_ARIGHTS_SHIFT)
+
+#define I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT 53
+#define I40IW_CQPSQ_STAG_REMACCENABLED_MASK \
+ (1ULL << I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT)
+
+#define I40IW_CQPSQ_STAG_VABASEDTO_SHIFT 59
+#define I40IW_CQPSQ_STAG_VABASEDTO_MASK \
+ (1ULL << I40IW_CQPSQ_STAG_VABASEDTO_SHIFT)
+
+#define I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT 60
+#define I40IW_CQPSQ_STAG_USEHMCFNIDX_MASK \
+ (1ULL << I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_USEPFRID_SHIFT 61
+#define I40IW_CQPSQ_STAG_USEPFRID_MASK \
+ (1ULL << I40IW_CQPSQ_STAG_USEPFRID_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PBA_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_STAG_PBA_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT 0
+#define I40IW_CQPSQ_STAG_HMCFNIDX_MASK \
+ (0x3fUL << I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_MASK \
+ (0xfffffffUL << I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT)
+
+/* Query stag */
+#define I40IW_CQPSQ_QUERYSTAG_IDX_SHIFT I40IW_CQPSQ_STAG_IDX_SHIFT
+#define I40IW_CQPSQ_QUERYSTAG_IDX_MASK I40IW_CQPSQ_STAG_IDX_MASK
+
+/* Allocate Local IP Address Entry */
+
+/* Manage Local IP Address Table - MLIPA */
+#define I40IW_CQPSQ_MLIPA_IPV6LO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_MLIPA_IPV6LO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_MLIPA_IPV6HI_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_MLIPA_IPV6HI_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_MLIPA_IPV4_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_IPV4_MASK \
+ (0xffffffffUL << I40IW_CQPSQ_MLIPA_IPV4_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_MASK \
+ (0x3fUL << I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT 42
+#define I40IW_CQPSQ_MLIPA_IPV4VALID_MASK \
+ (1ULL << I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT 43
+#define I40IW_CQPSQ_MLIPA_IPV6VALID_MASK \
+ (1ULL << I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT 62
+#define I40IW_CQPSQ_MLIPA_FREEENTRY_MASK \
+ (1ULL << I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT 61
+#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_MASK \
+ (1ULL << I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC0_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_MAC0_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC0_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC1_SHIFT 8
+#define I40IW_CQPSQ_MLIPA_MAC1_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC1_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC2_SHIFT 16
+#define I40IW_CQPSQ_MLIPA_MAC2_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC2_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC3_SHIFT 24
+#define I40IW_CQPSQ_MLIPA_MAC3_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC3_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC4_SHIFT 32
+#define I40IW_CQPSQ_MLIPA_MAC4_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC4_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC5_SHIFT 40
+#define I40IW_CQPSQ_MLIPA_MAC5_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC5_SHIFT)
+
+/* Manage ARP Table - MAT */
+#define I40IW_CQPSQ_MAT_REACHMAX_SHIFT 0
+#define I40IW_CQPSQ_MAT_REACHMAX_MASK \
+ (0xffffffffUL << I40IW_CQPSQ_MAT_REACHMAX_SHIFT)
+
+#define I40IW_CQPSQ_MAT_MACADDR_SHIFT 0
+#define I40IW_CQPSQ_MAT_MACADDR_MASK \
+ (0xffffffffffffULL << I40IW_CQPSQ_MAT_MACADDR_SHIFT)
+
+#define I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT 0
+#define I40IW_CQPSQ_MAT_ARPENTRYIDX_MASK \
+ (0xfffUL << I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT)
+
+#define I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT 42
+#define I40IW_CQPSQ_MAT_ENTRYVALID_MASK \
+ (1ULL << I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT)
+
+#define I40IW_CQPSQ_MAT_PERMANENT_SHIFT 43
+#define I40IW_CQPSQ_MAT_PERMANENT_MASK \
+ (1ULL << I40IW_CQPSQ_MAT_PERMANENT_SHIFT)
+
+#define I40IW_CQPSQ_MAT_QUERY_SHIFT 44
+#define I40IW_CQPSQ_MAT_QUERY_MASK (1ULL << I40IW_CQPSQ_MAT_QUERY_SHIFT)
+
+/* Manage VF PBLE Backing Pages - MVPBP*/
+#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT 0
+#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_MASK \
+ (0x3ffULL << I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT 16
+#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_MASK \
+ (0x1ffULL << I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_SD_INX_SHIFT 32
+#define I40IW_CQPSQ_MVPBP_SD_INX_MASK \
+ (0xfffULL << I40IW_CQPSQ_MVPBP_SD_INX_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT 62
+#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_MASK \
+ (0x1ULL << I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT 3
+#define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \
+ (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT)
+
+/* Manage Push Page - MPP */
+#define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff
+
+#define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0
+#define I40IW_CQPSQ_MPP_QS_HANDLE_MASK (0xffffUL << \
+ I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT)
+
+#define I40IW_CQPSQ_MPP_PPIDX_SHIFT 0
+#define I40IW_CQPSQ_MPP_PPIDX_MASK (0x3ffUL << I40IW_CQPSQ_MPP_PPIDX_SHIFT)
+
+#define I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT 62
+#define I40IW_CQPSQ_MPP_FREE_PAGE_MASK (1ULL << I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT)
+
+/* Upload Context - UCTX */
+#define I40IW_CQPSQ_UCTX_QPCTXADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_UCTX_QPCTXADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_UCTX_QPID_SHIFT 0
+#define I40IW_CQPSQ_UCTX_QPID_MASK (0x3ffffUL << I40IW_CQPSQ_UCTX_QPID_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_QPTYPE_SHIFT 48
+#define I40IW_CQPSQ_UCTX_QPTYPE_MASK (0xfULL << I40IW_CQPSQ_UCTX_QPTYPE_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT 61
+#define I40IW_CQPSQ_UCTX_RAWFORMAT_MASK \
+ (1ULL << I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT 62
+#define I40IW_CQPSQ_UCTX_FREEZEQP_MASK \
+ (1ULL << I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT)
+
+/* Manage HMC PM Function Table - MHMC */
+#define I40IW_CQPSQ_MHMC_VFIDX_SHIFT 0
+#define I40IW_CQPSQ_MHMC_VFIDX_MASK (0x7fUL << I40IW_CQPSQ_MHMC_VFIDX_SHIFT)
+
+#define I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT 62
+#define I40IW_CQPSQ_MHMC_FREEPMFN_MASK \
+ (1ULL << I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT)
+
+/* Set HMC Resource Profile - SHMCRP */
+#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT 0
+#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_MASK \
+ (0x7ULL << I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT)
+#define I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT 32
+#define I40IW_CQPSQ_SHMCRP_VFNUM_MASK (0x3fULL << I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT)
+
+/* Create/Destroy CEQ */
+#define I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT 0
+#define I40IW_CQPSQ_CEQ_CEQSIZE_MASK \
+ (0x1ffffUL << I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_CEQID_SHIFT 0
+#define I40IW_CQPSQ_CEQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CEQ_CEQID_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_CEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_CEQ_VMAP_SHIFT 47
+#define I40IW_CQPSQ_CEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_CEQ_VMAP_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_MASK \
+ (0xfffffffUL << I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Create/Destroy AEQ */
+#define I40IW_CQPSQ_AEQ_AEQECNT_SHIFT 0
+#define I40IW_CQPSQ_AEQ_AEQECNT_MASK \
+ (0x7ffffUL << I40IW_CQPSQ_AEQ_AEQECNT_SHIFT)
+
+#define I40IW_CQPSQ_AEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_AEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_AEQ_VMAP_SHIFT 47
+#define I40IW_CQPSQ_AEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_AEQ_VMAP_SHIFT)
+
+#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_MASK \
+ (0xfffffffUL << I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Commit FPM Values - CFPM */
+#define I40IW_CQPSQ_CFPM_HMCFNID_SHIFT 0
+#define I40IW_CQPSQ_CFPM_HMCFNID_MASK (0x3fUL << I40IW_CQPSQ_CFPM_HMCFNID_SHIFT)
+
+/* Flush WQEs - FWQE */
+#define I40IW_CQPSQ_FWQE_AECODE_SHIFT 0
+#define I40IW_CQPSQ_FWQE_AECODE_MASK (0xffffUL << I40IW_CQPSQ_FWQE_AECODE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_AESOURCE_SHIFT 16
+#define I40IW_CQPSQ_FWQE_AESOURCE_MASK \
+ (0xfUL << I40IW_CQPSQ_FWQE_AESOURCE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_RQMNERR_SHIFT 0
+#define I40IW_CQPSQ_FWQE_RQMNERR_MASK \
+ (0xffffUL << I40IW_CQPSQ_FWQE_RQMNERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_RQMJERR_SHIFT 16
+#define I40IW_CQPSQ_FWQE_RQMJERR_MASK \
+ (0xffffUL << I40IW_CQPSQ_FWQE_RQMJERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_SQMNERR_SHIFT 32
+#define I40IW_CQPSQ_FWQE_SQMNERR_MASK \
+ (0xffffULL << I40IW_CQPSQ_FWQE_SQMNERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_SQMJERR_SHIFT 48
+#define I40IW_CQPSQ_FWQE_SQMJERR_MASK \
+ (0xffffULL << I40IW_CQPSQ_FWQE_SQMJERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_QPID_SHIFT 0
+#define I40IW_CQPSQ_FWQE_QPID_MASK (0x3ffffULL << I40IW_CQPSQ_FWQE_QPID_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT 59
+#define I40IW_CQPSQ_FWQE_GENERATE_AE_MASK (1ULL << \
+ I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT 60
+#define I40IW_CQPSQ_FWQE_USERFLCODE_MASK \
+ (1ULL << I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT 61
+#define I40IW_CQPSQ_FWQE_FLUSHSQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT 62
+#define I40IW_CQPSQ_FWQE_FLUSHRQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT)
+
+/* Manage Accelerated Port Table - MAPT */
+#define I40IW_CQPSQ_MAPT_PORT_SHIFT 0
+#define I40IW_CQPSQ_MAPT_PORT_MASK (0xffffUL << I40IW_CQPSQ_MAPT_PORT_SHIFT)
+
+#define I40IW_CQPSQ_MAPT_ADDPORT_SHIFT 62
+#define I40IW_CQPSQ_MAPT_ADDPORT_MASK (1ULL << I40IW_CQPSQ_MAPT_ADDPORT_SHIFT)
+
+/* Update Protocol Engine SDs */
+#define I40IW_CQPSQ_UPESD_SDCMD_SHIFT 0
+#define I40IW_CQPSQ_UPESD_SDCMD_MASK (0xffffffffUL << I40IW_CQPSQ_UPESD_SDCMD_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT 0
+#define I40IW_CQPSQ_UPESD_SDDATALOW_MASK \
+ (0xffffffffUL << I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT 32
+#define I40IW_CQPSQ_UPESD_SDDATAHI_MASK \
+ (0xffffffffULL << I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT)
+#define I40IW_CQPSQ_UPESD_HMCFNID_SHIFT 0
+#define I40IW_CQPSQ_UPESD_HMCFNID_MASK \
+ (0x3fUL << I40IW_CQPSQ_UPESD_HMCFNID_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT 63
+#define I40IW_CQPSQ_UPESD_ENTRY_VALID_MASK \
+ ((u64)1 << I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT 0
+#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_MASK \
+ (0xfUL << I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT 7
+#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_MASK \
+ (0x1UL << I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT)
+
+/* Suspend QP */
+#define I40IW_CQPSQ_SUSPENDQP_QPID_SHIFT 0
+#define I40IW_CQPSQ_SUSPENDQP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+/* Resume QP */
+#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT 0
+#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_MASK \
+ (0xffffffffUL << I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT)
+
+#define I40IW_CQPSQ_RESUMEQP_QPID_SHIFT 0
+#define I40IW_CQPSQ_RESUMEQP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+/* IW QP Context */
+#define I40IWQPC_DDP_VER_SHIFT 0
+#define I40IWQPC_DDP_VER_MASK (3UL << I40IWQPC_DDP_VER_SHIFT)
+
+#define I40IWQPC_SNAP_SHIFT 2
+#define I40IWQPC_SNAP_MASK (1UL << I40IWQPC_SNAP_SHIFT)
+
+#define I40IWQPC_IPV4_SHIFT 3
+#define I40IWQPC_IPV4_MASK (1UL << I40IWQPC_IPV4_SHIFT)
+
+#define I40IWQPC_NONAGLE_SHIFT 4
+#define I40IWQPC_NONAGLE_MASK (1UL << I40IWQPC_NONAGLE_SHIFT)
+
+#define I40IWQPC_INSERTVLANTAG_SHIFT 5
+#define I40IWQPC_INSERTVLANTAG_MASK (1 << I40IWQPC_INSERTVLANTAG_SHIFT)
+
+#define I40IWQPC_USESRQ_SHIFT 6
+#define I40IWQPC_USESRQ_MASK (1UL << I40IWQPC_USESRQ_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_SHIFT 7
+#define I40IWQPC_TIMESTAMP_MASK (1UL << I40IWQPC_TIMESTAMP_SHIFT)
+
+#define I40IWQPC_RQWQESIZE_SHIFT 8
+#define I40IWQPC_RQWQESIZE_MASK (3UL << I40IWQPC_RQWQESIZE_SHIFT)
+
+#define I40IWQPC_INSERTL2TAG2_SHIFT 11
+#define I40IWQPC_INSERTL2TAG2_MASK (1UL << I40IWQPC_INSERTL2TAG2_SHIFT)
+
+#define I40IWQPC_LIMIT_SHIFT 12
+#define I40IWQPC_LIMIT_MASK (3UL << I40IWQPC_LIMIT_SHIFT)
+
+#define I40IWQPC_DROPOOOSEG_SHIFT 15
+#define I40IWQPC_DROPOOOSEG_MASK (1UL << I40IWQPC_DROPOOOSEG_SHIFT)
+
+#define I40IWQPC_DUPACK_THRESH_SHIFT 16
+#define I40IWQPC_DUPACK_THRESH_MASK (7UL << I40IWQPC_DUPACK_THRESH_SHIFT)
+
+#define I40IWQPC_ERR_RQ_IDX_VALID_SHIFT 19
+#define I40IWQPC_ERR_RQ_IDX_VALID_MASK (1UL << I40IWQPC_ERR_RQ_IDX_VALID_SHIFT)
+
+#define I40IWQPC_DIS_VLAN_CHECKS_SHIFT 19
+#define I40IWQPC_DIS_VLAN_CHECKS_MASK (7UL << I40IWQPC_DIS_VLAN_CHECKS_SHIFT)
+
+#define I40IWQPC_RCVTPHEN_SHIFT 28
+#define I40IWQPC_RCVTPHEN_MASK (1UL << I40IWQPC_RCVTPHEN_SHIFT)
+
+#define I40IWQPC_XMITTPHEN_SHIFT 29
+#define I40IWQPC_XMITTPHEN_MASK (1ULL << I40IWQPC_XMITTPHEN_SHIFT)
+
+#define I40IWQPC_RQTPHEN_SHIFT 30
+#define I40IWQPC_RQTPHEN_MASK (1UL << I40IWQPC_RQTPHEN_SHIFT)
+
+#define I40IWQPC_SQTPHEN_SHIFT 31
+#define I40IWQPC_SQTPHEN_MASK (1ULL << I40IWQPC_SQTPHEN_SHIFT)
+
+#define I40IWQPC_PPIDX_SHIFT 32
+#define I40IWQPC_PPIDX_MASK (0x3ffULL << I40IWQPC_PPIDX_SHIFT)
+
+#define I40IWQPC_PMENA_SHIFT 47
+#define I40IWQPC_PMENA_MASK (1ULL << I40IWQPC_PMENA_SHIFT)
+
+#define I40IWQPC_RDMAP_VER_SHIFT 62
+#define I40IWQPC_RDMAP_VER_MASK (3ULL << I40IWQPC_RDMAP_VER_SHIFT)
+
+#define I40IWQPC_SQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_SQADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_RQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_RQADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_TTL_SHIFT 0
+#define I40IWQPC_TTL_MASK (0xffUL << I40IWQPC_TTL_SHIFT)
+
+#define I40IWQPC_RQSIZE_SHIFT 8
+#define I40IWQPC_RQSIZE_MASK (0xfUL << I40IWQPC_RQSIZE_SHIFT)
+
+#define I40IWQPC_SQSIZE_SHIFT 12
+#define I40IWQPC_SQSIZE_MASK (0xfUL << I40IWQPC_SQSIZE_SHIFT)
+
+#define I40IWQPC_SRCMACADDRIDX_SHIFT 16
+#define I40IWQPC_SRCMACADDRIDX_MASK (0x3fUL << I40IWQPC_SRCMACADDRIDX_SHIFT)
+
+#define I40IWQPC_AVOIDSTRETCHACK_SHIFT 23
+#define I40IWQPC_AVOIDSTRETCHACK_MASK (1UL << I40IWQPC_AVOIDSTRETCHACK_SHIFT)
+
+#define I40IWQPC_TOS_SHIFT 24
+#define I40IWQPC_TOS_MASK (0xffUL << I40IWQPC_TOS_SHIFT)
+
+#define I40IWQPC_SRCPORTNUM_SHIFT 32
+#define I40IWQPC_SRCPORTNUM_MASK (0xffffULL << I40IWQPC_SRCPORTNUM_SHIFT)
+
+#define I40IWQPC_DESTPORTNUM_SHIFT 48
+#define I40IWQPC_DESTPORTNUM_MASK (0xffffULL << I40IWQPC_DESTPORTNUM_SHIFT)
+
+#define I40IWQPC_DESTIPADDR0_SHIFT 32
+#define I40IWQPC_DESTIPADDR0_MASK \
+ (0xffffffffULL << I40IWQPC_DESTIPADDR0_SHIFT)
+
+#define I40IWQPC_DESTIPADDR1_SHIFT 0
+#define I40IWQPC_DESTIPADDR1_MASK \
+ (0xffffffffULL << I40IWQPC_DESTIPADDR1_SHIFT)
+
+#define I40IWQPC_DESTIPADDR2_SHIFT 32
+#define I40IWQPC_DESTIPADDR2_MASK \
+ (0xffffffffULL << I40IWQPC_DESTIPADDR2_SHIFT)
+
+#define I40IWQPC_DESTIPADDR3_SHIFT 0
+#define I40IWQPC_DESTIPADDR3_MASK \
+ (0xffffffffULL << I40IWQPC_DESTIPADDR3_SHIFT)
+
+#define I40IWQPC_SNDMSS_SHIFT 16
+#define I40IWQPC_SNDMSS_MASK (0x3fffUL << I40IWQPC_SNDMSS_SHIFT)
+
+#define I40IWQPC_VLANTAG_SHIFT 32
+#define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT)
+
+#define I40IWQPC_ARPIDX_SHIFT 48
+#define I40IWQPC_ARPIDX_MASK (0xfffULL << I40IWQPC_ARPIDX_SHIFT)
+
+#define I40IWQPC_FLOWLABEL_SHIFT 0
+#define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT)
+
+#define I40IWQPC_WSCALE_SHIFT 20
+#define I40IWQPC_WSCALE_MASK (1UL << I40IWQPC_WSCALE_SHIFT)
+
+#define I40IWQPC_KEEPALIVE_SHIFT 21
+#define I40IWQPC_KEEPALIVE_MASK (1UL << I40IWQPC_KEEPALIVE_SHIFT)
+
+#define I40IWQPC_IGNORE_TCP_OPT_SHIFT 22
+#define I40IWQPC_IGNORE_TCP_OPT_MASK (1UL << I40IWQPC_IGNORE_TCP_OPT_SHIFT)
+
+#define I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT 23
+#define I40IWQPC_IGNORE_TCP_UNS_OPT_MASK \
+ (1UL << I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT)
+
+#define I40IWQPC_TCPSTATE_SHIFT 28
+#define I40IWQPC_TCPSTATE_MASK (0xfUL << I40IWQPC_TCPSTATE_SHIFT)
+
+#define I40IWQPC_RCVSCALE_SHIFT 32
+#define I40IWQPC_RCVSCALE_MASK (0xfULL << I40IWQPC_RCVSCALE_SHIFT)
+
+#define I40IWQPC_SNDSCALE_SHIFT 40
+#define I40IWQPC_SNDSCALE_MASK (0xfULL << I40IWQPC_SNDSCALE_SHIFT)
+
+#define I40IWQPC_PDIDX_SHIFT 48
+#define I40IWQPC_PDIDX_MASK (0x7fffULL << I40IWQPC_PDIDX_SHIFT)
+
+#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT 16
+#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_MASK \
+ (0xffUL << I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT)
+
+#define I40IWQPC_KEEPALIVE_INTERVAL_SHIFT 24
+#define I40IWQPC_KEEPALIVE_INTERVAL_MASK \
+ (0xffUL << I40IWQPC_KEEPALIVE_INTERVAL_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_RECENT_SHIFT 0
+#define I40IWQPC_TIMESTAMP_RECENT_MASK \
+ (0xffffffffUL << I40IWQPC_TIMESTAMP_RECENT_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_AGE_SHIFT 32
+#define I40IWQPC_TIMESTAMP_AGE_MASK \
+ (0xffffffffULL << I40IWQPC_TIMESTAMP_AGE_SHIFT)
+
+#define I40IWQPC_SNDNXT_SHIFT 0
+#define I40IWQPC_SNDNXT_MASK (0xffffffffUL << I40IWQPC_SNDNXT_SHIFT)
+
+#define I40IWQPC_SNDWND_SHIFT 32
+#define I40IWQPC_SNDWND_MASK (0xffffffffULL << I40IWQPC_SNDWND_SHIFT)
+
+#define I40IWQPC_RCVNXT_SHIFT 0
+#define I40IWQPC_RCVNXT_MASK (0xffffffffUL << I40IWQPC_RCVNXT_SHIFT)
+
+#define I40IWQPC_RCVWND_SHIFT 32
+#define I40IWQPC_RCVWND_MASK (0xffffffffULL << I40IWQPC_RCVWND_SHIFT)
+
+#define I40IWQPC_SNDMAX_SHIFT 0
+#define I40IWQPC_SNDMAX_MASK (0xffffffffUL << I40IWQPC_SNDMAX_SHIFT)
+
+#define I40IWQPC_SNDUNA_SHIFT 32
+#define I40IWQPC_SNDUNA_MASK (0xffffffffULL << I40IWQPC_SNDUNA_SHIFT)
+
+#define I40IWQPC_SRTT_SHIFT 0
+#define I40IWQPC_SRTT_MASK (0xffffffffUL << I40IWQPC_SRTT_SHIFT)
+
+#define I40IWQPC_RTTVAR_SHIFT 32
+#define I40IWQPC_RTTVAR_MASK (0xffffffffULL << I40IWQPC_RTTVAR_SHIFT)
+
+#define I40IWQPC_SSTHRESH_SHIFT 0
+#define I40IWQPC_SSTHRESH_MASK (0xffffffffUL << I40IWQPC_SSTHRESH_SHIFT)
+
+#define I40IWQPC_CWND_SHIFT 32
+#define I40IWQPC_CWND_MASK (0xffffffffULL << I40IWQPC_CWND_SHIFT)
+
+#define I40IWQPC_SNDWL1_SHIFT 0
+#define I40IWQPC_SNDWL1_MASK (0xffffffffUL << I40IWQPC_SNDWL1_SHIFT)
+
+#define I40IWQPC_SNDWL2_SHIFT 32
+#define I40IWQPC_SNDWL2_MASK (0xffffffffULL << I40IWQPC_SNDWL2_SHIFT)
+
+#define I40IWQPC_ERR_RQ_IDX_SHIFT 32
+#define I40IWQPC_ERR_RQ_IDX_MASK (0x3fffULL << I40IWQPC_ERR_RQ_IDX_SHIFT)
+
+#define I40IWQPC_MAXSNDWND_SHIFT 0
+#define I40IWQPC_MAXSNDWND_MASK (0xffffffffUL << I40IWQPC_MAXSNDWND_SHIFT)
+
+#define I40IWQPC_REXMIT_THRESH_SHIFT 48
+#define I40IWQPC_REXMIT_THRESH_MASK (0x3fULL << I40IWQPC_REXMIT_THRESH_SHIFT)
+
+#define I40IWQPC_TXCQNUM_SHIFT 0
+#define I40IWQPC_TXCQNUM_MASK (0x1ffffUL << I40IWQPC_TXCQNUM_SHIFT)
+
+#define I40IWQPC_RXCQNUM_SHIFT 32
+#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
+
+#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_LASTBYTESENT_SHIFT 0
+#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
+
+#define I40IWQPC_SRQID_SHIFT 32
+#define I40IWQPC_SRQID_MASK (0xffULL << I40IWQPC_SRQID_SHIFT)
+
+#define I40IWQPC_ORDSIZE_SHIFT 0
+#define I40IWQPC_ORDSIZE_MASK (0x7fUL << I40IWQPC_ORDSIZE_SHIFT)
+
+#define I40IWQPC_IRDSIZE_SHIFT 16
+#define I40IWQPC_IRDSIZE_MASK (0x3UL << I40IWQPC_IRDSIZE_SHIFT)
+
+#define I40IWQPC_WRRDRSPOK_SHIFT 20
+#define I40IWQPC_WRRDRSPOK_MASK (1UL << I40IWQPC_WRRDRSPOK_SHIFT)
+
+#define I40IWQPC_RDOK_SHIFT 21
+#define I40IWQPC_RDOK_MASK (1UL << I40IWQPC_RDOK_SHIFT)
+
+#define I40IWQPC_SNDMARKERS_SHIFT 22
+#define I40IWQPC_SNDMARKERS_MASK (1UL << I40IWQPC_SNDMARKERS_SHIFT)
+
+#define I40IWQPC_BINDEN_SHIFT 23
+#define I40IWQPC_BINDEN_MASK (1UL << I40IWQPC_BINDEN_SHIFT)
+
+#define I40IWQPC_FASTREGEN_SHIFT 24
+#define I40IWQPC_FASTREGEN_MASK (1UL << I40IWQPC_FASTREGEN_SHIFT)
+
+#define I40IWQPC_PRIVEN_SHIFT 25
+#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
+
+#define I40IWQPC_LSMMPRESENT_SHIFT 26
+#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT)
+
+#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27
+#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT)
+
+#define I40IWQPC_IWARPMODE_SHIFT 28
+#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
+
+#define I40IWQPC_RCVMARKERS_SHIFT 29
+#define I40IWQPC_RCVMARKERS_MASK (1UL << I40IWQPC_RCVMARKERS_SHIFT)
+
+#define I40IWQPC_ALIGNHDRS_SHIFT 30
+#define I40IWQPC_ALIGNHDRS_MASK (1UL << I40IWQPC_ALIGNHDRS_SHIFT)
+
+#define I40IWQPC_RCVNOMPACRC_SHIFT 31
+#define I40IWQPC_RCVNOMPACRC_MASK (1UL << I40IWQPC_RCVNOMPACRC_SHIFT)
+
+#define I40IWQPC_RCVMARKOFFSET_SHIFT 33
+#define I40IWQPC_RCVMARKOFFSET_MASK (0x1ffULL << I40IWQPC_RCVMARKOFFSET_SHIFT)
+
+#define I40IWQPC_SNDMARKOFFSET_SHIFT 48
+#define I40IWQPC_SNDMARKOFFSET_MASK (0x1ffULL << I40IWQPC_SNDMARKOFFSET_SHIFT)
+
+#define I40IWQPC_QPCOMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_QPCOMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_SQTPHVAL_SHIFT 0
+#define I40IWQPC_SQTPHVAL_MASK (0xffUL << I40IWQPC_SQTPHVAL_SHIFT)
+
+#define I40IWQPC_RQTPHVAL_SHIFT 8
+#define I40IWQPC_RQTPHVAL_MASK (0xffUL << I40IWQPC_RQTPHVAL_SHIFT)
+
+#define I40IWQPC_QSHANDLE_SHIFT 16
+#define I40IWQPC_QSHANDLE_MASK (0x3ffUL << I40IWQPC_QSHANDLE_SHIFT)
+
+#define I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT 32
+#define I40IWQPC_EXCEPTION_LAN_QUEUE_MASK (0xfffULL << \
+ I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR3_SHIFT 0
+#define I40IWQPC_LOCAL_IPADDR3_MASK \
+ (0xffffffffUL << I40IWQPC_LOCAL_IPADDR3_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR2_SHIFT 32
+#define I40IWQPC_LOCAL_IPADDR2_MASK \
+ (0xffffffffULL << I40IWQPC_LOCAL_IPADDR2_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR1_SHIFT 0
+#define I40IWQPC_LOCAL_IPADDR1_MASK \
+ (0xffffffffUL << I40IWQPC_LOCAL_IPADDR1_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR0_SHIFT 32
+#define I40IWQPC_LOCAL_IPADDR0_MASK \
+ (0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT)
+
+/* wqe size considering 32 bytes per wqe*/
+#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */
+#define I40IWQP_SW_MAX_WQSIZE 16384 /* 524288 bytes */
+
+#define I40IWQP_OP_RDMA_WRITE 0
+#define I40IWQP_OP_RDMA_READ 1
+#define I40IWQP_OP_RDMA_SEND 3
+#define I40IWQP_OP_RDMA_SEND_INV 4
+#define I40IWQP_OP_RDMA_SEND_SOL_EVENT 5
+#define I40IWQP_OP_RDMA_SEND_SOL_EVENT_INV 6
+#define I40IWQP_OP_BIND_MW 8
+#define I40IWQP_OP_FAST_REGISTER 9
+#define I40IWQP_OP_LOCAL_INVALIDATE 10
+#define I40IWQP_OP_RDMA_READ_LOC_INV 11
+#define I40IWQP_OP_NOP 12
+
+#define I40IW_RSVD_SHIFT 41
+#define I40IW_RSVD_MASK (0x7fffULL << I40IW_RSVD_SHIFT)
+
+/* iwarp QP SQ WQE common fields */
+#define I40IWQPSQ_OPCODE_SHIFT 32
+#define I40IWQPSQ_OPCODE_MASK (0x3fULL << I40IWQPSQ_OPCODE_SHIFT)
+
+#define I40IWQPSQ_ADDFRAGCNT_SHIFT 38
+#define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT)
+
+#define I40IWQPSQ_PUSHWQE_SHIFT 56
+#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT)
+
+#define I40IWQPSQ_STREAMMODE_SHIFT 58
+#define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT)
+
+#define I40IWQPSQ_WAITFORRCVPDU_SHIFT 59
+#define I40IWQPSQ_WAITFORRCVPDU_MASK (1ULL << I40IWQPSQ_WAITFORRCVPDU_SHIFT)
+
+#define I40IWQPSQ_READFENCE_SHIFT 60
+#define I40IWQPSQ_READFENCE_MASK (1ULL << I40IWQPSQ_READFENCE_SHIFT)
+
+#define I40IWQPSQ_LOCALFENCE_SHIFT 61
+#define I40IWQPSQ_LOCALFENCE_MASK (1ULL << I40IWQPSQ_LOCALFENCE_SHIFT)
+
+#define I40IWQPSQ_SIGCOMPL_SHIFT 62
+#define I40IWQPSQ_SIGCOMPL_MASK (1ULL << I40IWQPSQ_SIGCOMPL_SHIFT)
+
+#define I40IWQPSQ_VALID_SHIFT 63
+#define I40IWQPSQ_VALID_MASK (1ULL << I40IWQPSQ_VALID_SHIFT)
+
+#define I40IWQPSQ_FRAG_TO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_FRAG_TO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPSQ_FRAG_LEN_SHIFT 0
+#define I40IWQPSQ_FRAG_LEN_MASK (0xffffffffUL << I40IWQPSQ_FRAG_LEN_SHIFT)
+
+#define I40IWQPSQ_FRAG_STAG_SHIFT 32
+#define I40IWQPSQ_FRAG_STAG_MASK (0xffffffffULL << I40IWQPSQ_FRAG_STAG_SHIFT)
+
+#define I40IWQPSQ_REMSTAGINV_SHIFT 0
+#define I40IWQPSQ_REMSTAGINV_MASK (0xffffffffUL << I40IWQPSQ_REMSTAGINV_SHIFT)
+
+#define I40IWQPSQ_INLINEDATAFLAG_SHIFT 57
+#define I40IWQPSQ_INLINEDATAFLAG_MASK (1ULL << I40IWQPSQ_INLINEDATAFLAG_SHIFT)
+
+#define I40IWQPSQ_INLINEDATALEN_SHIFT 48
+#define I40IWQPSQ_INLINEDATALEN_MASK \
+ (0x7fULL << I40IWQPSQ_INLINEDATALEN_SHIFT)
+
+/* iwarp send with push mode */
+#define I40IWQPSQ_WQDESCIDX_SHIFT 0
+#define I40IWQPSQ_WQDESCIDX_MASK (0x3fffUL << I40IWQPSQ_WQDESCIDX_SHIFT)
+
+/* rdma write */
+#define I40IWQPSQ_REMSTAG_SHIFT 0
+#define I40IWQPSQ_REMSTAG_MASK (0xffffffffUL << I40IWQPSQ_REMSTAG_SHIFT)
+
+#define I40IWQPSQ_REMTO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_REMTO_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* memory window */
+#define I40IWQPSQ_STAGRIGHTS_SHIFT 48
+#define I40IWQPSQ_STAGRIGHTS_MASK (0x1fULL << I40IWQPSQ_STAGRIGHTS_SHIFT)
+
+#define I40IWQPSQ_VABASEDTO_SHIFT 53
+#define I40IWQPSQ_VABASEDTO_MASK (1ULL << I40IWQPSQ_VABASEDTO_SHIFT)
+
+#define I40IWQPSQ_MWLEN_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_MWLEN_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPSQ_PARENTMRSTAG_SHIFT 0
+#define I40IWQPSQ_PARENTMRSTAG_MASK \
+ (0xffffffffUL << I40IWQPSQ_PARENTMRSTAG_SHIFT)
+
+#define I40IWQPSQ_MWSTAG_SHIFT 32
+#define I40IWQPSQ_MWSTAG_MASK (0xffffffffULL << I40IWQPSQ_MWSTAG_SHIFT)
+
+#define I40IWQPSQ_BASEVA_TO_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_BASEVA_TO_FBO_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Local Invalidate */
+#define I40IWQPSQ_LOCSTAG_SHIFT 32
+#define I40IWQPSQ_LOCSTAG_MASK (0xffffffffULL << I40IWQPSQ_LOCSTAG_SHIFT)
+
+/* Fast Register */
+#define I40IWQPSQ_STAGKEY_SHIFT 0
+#define I40IWQPSQ_STAGKEY_MASK (0xffUL << I40IWQPSQ_STAGKEY_SHIFT)
+
+#define I40IWQPSQ_STAGINDEX_SHIFT 8
+#define I40IWQPSQ_STAGINDEX_MASK (0xffffffUL << I40IWQPSQ_STAGINDEX_SHIFT)
+
+#define I40IWQPSQ_COPYHOSTPBLS_SHIFT 43
+#define I40IWQPSQ_COPYHOSTPBLS_MASK (1ULL << I40IWQPSQ_COPYHOSTPBLS_SHIFT)
+
+#define I40IWQPSQ_LPBLSIZE_SHIFT 44
+#define I40IWQPSQ_LPBLSIZE_MASK (3ULL << I40IWQPSQ_LPBLSIZE_SHIFT)
+
+#define I40IWQPSQ_HPAGESIZE_SHIFT 46
+#define I40IWQPSQ_HPAGESIZE_MASK (3ULL << I40IWQPSQ_HPAGESIZE_SHIFT)
+
+#define I40IWQPSQ_STAGLEN_SHIFT 0
+#define I40IWQPSQ_STAGLEN_MASK (0x1ffffffffffULL << I40IWQPSQ_STAGLEN_SHIFT)
+
+#define I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT 48
+#define I40IWQPSQ_FIRSTPMPBLIDXLO_MASK \
+ (0xffffULL << I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT)
+
+#define I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT 0
+#define I40IWQPSQ_FIRSTPMPBLIDXHI_MASK \
+ (0xfffUL << I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT)
+
+#define I40IWQPSQ_PBLADDR_SHIFT 12
+#define I40IWQPSQ_PBLADDR_MASK (0xfffffffffffffULL << I40IWQPSQ_PBLADDR_SHIFT)
+
+/* iwarp QP RQ WQE common fields */
+#define I40IWQPRQ_ADDFRAGCNT_SHIFT I40IWQPSQ_ADDFRAGCNT_SHIFT
+#define I40IWQPRQ_ADDFRAGCNT_MASK I40IWQPSQ_ADDFRAGCNT_MASK
+
+#define I40IWQPRQ_VALID_SHIFT I40IWQPSQ_VALID_SHIFT
+#define I40IWQPRQ_VALID_MASK I40IWQPSQ_VALID_MASK
+
+#define I40IWQPRQ_COMPLCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPRQ_COMPLCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPRQ_FRAG_LEN_SHIFT I40IWQPSQ_FRAG_LEN_SHIFT
+#define I40IWQPRQ_FRAG_LEN_MASK I40IWQPSQ_FRAG_LEN_MASK
+
+#define I40IWQPRQ_STAG_SHIFT I40IWQPSQ_FRAG_STAG_SHIFT
+#define I40IWQPRQ_STAG_MASK I40IWQPSQ_FRAG_STAG_MASK
+
+#define I40IWQPRQ_TO_SHIFT I40IWQPSQ_FRAG_TO_SHIFT
+#define I40IWQPRQ_TO_MASK I40IWQPSQ_FRAG_TO_MASK
+
+/* Query FPM CQP buf */
+#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_QPS_MASK \
+ (0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CQS_MASK \
+ (0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
+
+#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT 0
+#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_MASK \
+ (0x3fffUL << I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT 32
+#define I40IW_QUERY_FPM_MAX_PE_SDS_MASK \
+ (0x3fffULL << I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_QPS_MASK \
+ (0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CQS_MASK \
+ (0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CEQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CEQS_MASK \
+ (0xffUL << I40IW_QUERY_FPM_MAX_CEQS_SHIFT)
+
+#define I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT 32
+#define I40IW_QUERY_FPM_XFBLOCKSIZE_MASK \
+ (0xffffffffULL << I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT)
+
+#define I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT 32
+#define I40IW_QUERY_FPM_Q1BLOCKSIZE_MASK \
+ (0xffffffffULL << I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT)
+
+#define I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT 16
+#define I40IW_QUERY_FPM_HTMULTIPLIER_MASK \
+ (0xfUL << I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT)
+
+#define I40IW_QUERY_FPM_TIMERBUCKET_SHIFT 32
+#define I40IW_QUERY_FPM_TIMERBUCKET_MASK \
+ (0xffFFULL << I40IW_QUERY_FPM_TIMERBUCKET_SHIFT)
+
+/* Static HMC pages allocated buf */
+#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT 0
+#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_MASK \
+ (0x3fUL << I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT)
+
+#define I40IW_HW_PAGE_SIZE 4096
+#define I40IW_DONE_COUNT 1000
+#define I40IW_SLEEP_COUNT 10
+
+enum {
+ I40IW_QUEUES_ALIGNMENT_MASK = (128 - 1),
+ I40IW_AEQ_ALIGNMENT_MASK = (256 - 1),
+ I40IW_Q2_ALIGNMENT_MASK = (256 - 1),
+ I40IW_CEQ_ALIGNMENT_MASK = (256 - 1),
+ I40IW_CQ0_ALIGNMENT_MASK = (256 - 1),
+ I40IW_HOST_CTX_ALIGNMENT_MASK = (4 - 1),
+ I40IW_SHADOWAREA_MASK = (128 - 1),
+ I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = 0,
+ I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = 0
+};
+
+enum i40iw_alignment {
+ I40IW_CQP_ALIGNMENT = 0x200,
+ I40IW_AEQ_ALIGNMENT = 0x100,
+ I40IW_CEQ_ALIGNMENT = 0x100,
+ I40IW_CQ0_ALIGNMENT = 0x100,
+ I40IW_SD_BUF_ALIGNMENT = 0x100
+};
+
+#define I40IW_QP_WQE_MIN_SIZE 32
+#define I40IW_QP_WQE_MAX_SIZE 128
+
+#define I40IW_CQE_QTYPE_RQ 0
+#define I40IW_CQE_QTYPE_SQ 1
+
+#define I40IW_RING_INIT(_ring, _size) \
+ { \
+ (_ring).head = 0; \
+ (_ring).tail = 0; \
+ (_ring).size = (_size); \
+ }
+#define I40IW_RING_GETSIZE(_ring) ((_ring).size)
+#define I40IW_RING_GETCURRENT_HEAD(_ring) ((_ring).head)
+#define I40IW_RING_GETCURRENT_TAIL(_ring) ((_ring).tail)
+
+#define I40IW_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if (!I40IW_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = I40IW_ERR_RING_FULL; \
+ } \
+ }
+
+#define I40IW_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if ((I40IW_RING_WORK_AVAILABLE(_ring) + (_count)) < size) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = I40IW_ERR_RING_FULL; \
+ } \
+ }
+
+#define I40IW_RING_MOVE_TAIL(_ring) \
+ (_ring).tail = ((_ring).tail + 1) % (_ring).size
+
+#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
+ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
+#define I40IW_RING_SET_TAIL(_ring, _pos) \
+ (_ring).tail = (_pos) % (_ring).size
+
+#define I40IW_RING_FULL_ERR(_ring) \
+ ( \
+ (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 1)) \
+ )
+
+#define I40IW_ERR_RING_FULL2(_ring) \
+ ( \
+ (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 2)) \
+ )
+
+#define I40IW_ERR_RING_FULL3(_ring) \
+ ( \
+ (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 3)) \
+ )
+
+#define I40IW_RING_MORE_WORK(_ring) \
+ ( \
+ (I40IW_RING_WORK_AVAILABLE(_ring) != 0) \
+ )
+
+#define I40IW_RING_WORK_AVAILABLE(_ring) \
+ ( \
+ (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
+ )
+
+#define I40IW_RING_GET_WQES_AVAILABLE(_ring) \
+ ( \
+ ((_ring).size - I40IW_RING_WORK_AVAILABLE(_ring) - 1) \
+ )
+
+#define I40IW_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
+ { \
+ index = I40IW_RING_GETCURRENT_HEAD(_ring); \
+ I40IW_RING_MOVE_HEAD(_ring, _retcode); \
+ }
+
+/* Async Events codes */
+#define I40IW_AE_AMP_UNALLOCATED_STAG 0x0102
+#define I40IW_AE_AMP_INVALID_STAG 0x0103
+#define I40IW_AE_AMP_BAD_QP 0x0104
+#define I40IW_AE_AMP_BAD_PD 0x0105
+#define I40IW_AE_AMP_BAD_STAG_KEY 0x0106
+#define I40IW_AE_AMP_BAD_STAG_INDEX 0x0107
+#define I40IW_AE_AMP_BOUNDS_VIOLATION 0x0108
+#define I40IW_AE_AMP_RIGHTS_VIOLATION 0x0109
+#define I40IW_AE_AMP_TO_WRAP 0x010a
+#define I40IW_AE_AMP_FASTREG_SHARED 0x010b
+#define I40IW_AE_AMP_FASTREG_VALID_STAG 0x010c
+#define I40IW_AE_AMP_FASTREG_MW_STAG 0x010d
+#define I40IW_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
+#define I40IW_AE_AMP_FASTREG_PBL_TABLE_OVERFLOW 0x010f
+#define I40IW_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
+#define I40IW_AE_AMP_INVALIDATE_SHARED 0x0111
+#define I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
+#define I40IW_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
+#define I40IW_AE_AMP_MWBIND_VALID_STAG 0x0114
+#define I40IW_AE_AMP_MWBIND_OF_MR_STAG 0x0115
+#define I40IW_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
+#define I40IW_AE_AMP_MWBIND_TO_MW_STAG 0x0117
+#define I40IW_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
+#define I40IW_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
+#define I40IW_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
+#define I40IW_AE_AMP_MWBIND_BIND_DISABLED 0x011b
+#define I40IW_AE_AMP_WQE_INVALID_PARAMETER 0x0130
+#define I40IW_AE_BAD_CLOSE 0x0201
+#define I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
+#define I40IW_AE_CQ_OPERATION_ERROR 0x0203
+#define I40IW_AE_PRIV_OPERATION_DENIED 0x011c
+#define I40IW_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
+#define I40IW_AE_STAG_ZERO_INVALID 0x0206
+#define I40IW_AE_IB_RREQ_AND_Q1_FULL 0x0207
+#define I40IW_AE_SRQ_LIMIT 0x0209
+#define I40IW_AE_WQE_UNEXPECTED_OPCODE 0x020a
+#define I40IW_AE_WQE_INVALID_PARAMETER 0x020b
+#define I40IW_AE_WQE_LSMM_TOO_LONG 0x0220
+#define I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
+#define I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID 0x0302
+#define I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
+#define I40IW_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
+#define I40IW_AE_DDP_UBE_INVALID_MO 0x0305
+#define I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
+#define I40IW_AE_DDP_UBE_INVALID_QN 0x0307
+#define I40IW_AE_DDP_NO_L_BIT 0x0308
+#define I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
+#define I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
+#define I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
+#define I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
+#define I40IW_AE_INVALID_ARP_ENTRY 0x0401
+#define I40IW_AE_INVALID_TCP_OPTION_RCVD 0x0402
+#define I40IW_AE_STALE_ARP_ENTRY 0x0403
+#define I40IW_AE_INVALID_WQE_LENGTH 0x0404
+#define I40IW_AE_INVALID_MAC_ENTRY 0x0405
+#define I40IW_AE_LLP_CLOSE_COMPLETE 0x0501
+#define I40IW_AE_LLP_CONNECTION_RESET 0x0502
+#define I40IW_AE_LLP_FIN_RECEIVED 0x0503
+#define I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
+#define I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
+#define I40IW_AE_LLP_SEGMENT_TOO_LARGE 0x0506
+#define I40IW_AE_LLP_SEGMENT_TOO_SMALL 0x0507
+#define I40IW_AE_LLP_SYN_RECEIVED 0x0508
+#define I40IW_AE_LLP_TERMINATE_RECEIVED 0x0509
+#define I40IW_AE_LLP_TOO_MANY_RETRIES 0x050a
+#define I40IW_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
+#define I40IW_AE_LLP_DOUBT_REACHABILITY 0x050c
+#define I40IW_AE_LLP_RX_VLAN_MISMATCH 0x050d
+#define I40IW_AE_RESOURCE_EXHAUSTION 0x0520
+#define I40IW_AE_RESET_SENT 0x0601
+#define I40IW_AE_TERMINATE_SENT 0x0602
+#define I40IW_AE_RESET_NOT_SENT 0x0603
+#define I40IW_AE_LCE_QP_CATASTROPHIC 0x0700
+#define I40IW_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
+#define I40IW_AE_LCE_CQ_CATASTROPHIC 0x0702
+#define I40IW_AE_UDA_XMIT_FRAG_SEQ 0x0800
+#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0801
+#define I40IW_AE_UDA_XMIT_IPADDR_MISMATCH 0x0802
+#define I40IW_AE_QP_SUSPEND_COMPLETE 0x0900
+
+#define OP_DELETE_LOCAL_MAC_IPADDR_ENTRY 1
+#define OP_CEQ_DESTROY 2
+#define OP_AEQ_DESTROY 3
+#define OP_DELETE_ARP_CACHE_ENTRY 4
+#define OP_MANAGE_APBVT_ENTRY 5
+#define OP_CEQ_CREATE 6
+#define OP_AEQ_CREATE 7
+#define OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY 8
+#define OP_ADD_LOCAL_MAC_IPADDR_ENTRY 9
+#define OP_MANAGE_QHASH_TABLE_ENTRY 10
+#define OP_QP_MODIFY 11
+#define OP_QP_UPLOAD_CONTEXT 12
+#define OP_CQ_CREATE 13
+#define OP_CQ_DESTROY 14
+#define OP_QP_CREATE 15
+#define OP_QP_DESTROY 16
+#define OP_ALLOC_STAG 17
+#define OP_MR_REG_NON_SHARED 18
+#define OP_DEALLOC_STAG 19
+#define OP_MW_ALLOC 20
+#define OP_QP_FLUSH_WQES 21
+#define OP_ADD_ARP_CACHE_ENTRY 22
+#define OP_MANAGE_PUSH_PAGE 23
+#define OP_UPDATE_PE_SDS 24
+#define OP_MANAGE_HMC_PM_FUNC_TABLE 25
+#define OP_SUSPEND 26
+#define OP_RESUME 27
+#define OP_MANAGE_VF_PBLE_BP 28
+#define OP_QUERY_FPM_VALUES 29
+#define OP_COMMIT_FPM_VALUES 30
+#define OP_SIZE_CQP_STAT_ARRAY 31
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
new file mode 100644
index 000000000000..5484cbf55f0f
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
@@ -0,0 +1,821 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * i40iw_find_sd_index_limit - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @index: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by i40iw_hmc_rsrc_type.
+ */
+
+static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info,
+ u32 type,
+ u32 idx,
+ u32 cnt,
+ u32 *sd_idx,
+ u32 *sd_limit)
+{
+ u64 fpm_addr, fpm_limit;
+
+ fpm_addr = hmc_info->hmc_obj[(type)].base +
+ hmc_info->hmc_obj[type].size * idx;
+ fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
+ *sd_idx = (u32)(fpm_addr / I40IW_HMC_DIRECT_BP_SIZE);
+ *sd_limit = (u32)((fpm_limit - 1) / I40IW_HMC_DIRECT_BP_SIZE);
+ *sd_limit += 1;
+}
+
+/**
+ * i40iw_find_pd_index_limit - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_index: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by i40iw_hmc_rsrc_type.
+ */
+
+static inline void i40iw_find_pd_index_limit(struct i40iw_hmc_info *hmc_info,
+ u32 type,
+ u32 idx,
+ u32 cnt,
+ u32 *pd_idx,
+ u32 *pd_limit)
+{
+ u64 fpm_adr, fpm_limit;
+
+ fpm_adr = hmc_info->hmc_obj[type].base +
+ hmc_info->hmc_obj[type].size * idx;
+ fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
+ *(pd_idx) = (u32)(fpm_adr / I40IW_HMC_PAGED_BP_SIZE);
+ *(pd_limit) = (u32)((fpm_limit - 1) / I40IW_HMC_PAGED_BP_SIZE);
+ *(pd_limit) += 1;
+}
+
+/**
+ * i40iw_set_sd_entry - setup entry for sd programming
+ * @pa: physical addr
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static inline void i40iw_set_sd_entry(u64 pa,
+ u32 idx,
+ enum i40iw_sd_entry_type type,
+ struct update_sd_entry *entry)
+{
+ entry->data = pa | (I40IW_HMC_MAX_BP_COUNT << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
+ (((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
+ I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |
+ (1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);
+ entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
+}
+
+/**
+ * i40iw_clr_sd_entry - setup entry for sd clear
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static inline void i40iw_clr_sd_entry(u32 idx, enum i40iw_sd_entry_type type,
+ struct update_sd_entry *entry)
+{
+ entry->data = (I40IW_HMC_MAX_BP_COUNT <<
+ I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
+ (((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
+ I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);
+ entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
+}
+
+/**
+ * i40iw_hmc_sd_one - setup 1 sd entry for cqp
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ * @pa: physical addr
+ * @sd_idx: sd index
+ * @type: paged or direct sd
+ * @setsd: flag to set or clear sd
+ */
+enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev,
+ u8 hmc_fn_id,
+ u64 pa, u32 sd_idx,
+ enum i40iw_sd_entry_type type,
+ bool setsd)
+{
+ struct i40iw_update_sds_info sdinfo;
+
+ sdinfo.cnt = 1;
+ sdinfo.hmc_fn_id = hmc_fn_id;
+ if (setsd)
+ i40iw_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
+ else
+ i40iw_clr_sd_entry(sd_idx, type, sdinfo.entry);
+
+ return dev->cqp->process_cqp_sds(dev, &sdinfo);
+}
+
+/**
+ * i40iw_hmc_sd_grp - setup group od sd entries for cqp
+ * @dev: pointer to the device structure
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: sd index
+ * @sd_cnt: number of sd entries
+ * @setsd: flag to set or clear sd
+ */
+static enum i40iw_status_code i40iw_hmc_sd_grp(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_info *hmc_info,
+ u32 sd_index,
+ u32 sd_cnt,
+ bool setsd)
+{
+ struct i40iw_hmc_sd_entry *sd_entry;
+ struct i40iw_update_sds_info sdinfo;
+ u64 pa;
+ u32 i;
+ enum i40iw_status_code ret_code = 0;
+
+ memset(&sdinfo, 0, sizeof(sdinfo));
+ sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
+ for (i = sd_index; i < sd_index + sd_cnt; i++) {
+ sd_entry = &hmc_info->sd_table.sd_entry[i];
+ if (!sd_entry ||
+ (!sd_entry->valid && setsd) ||
+ (sd_entry->valid && !setsd))
+ continue;
+ if (setsd) {
+ pa = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa :
+ sd_entry->u.bp.addr.pa;
+ i40iw_set_sd_entry(pa, i, sd_entry->entry_type,
+ &sdinfo.entry[sdinfo.cnt]);
+ } else {
+ i40iw_clr_sd_entry(i, sd_entry->entry_type,
+ &sdinfo.entry[sdinfo.cnt]);
+ }
+ sdinfo.cnt++;
+ if (sdinfo.cnt == I40IW_MAX_SD_ENTRIES) {
+ ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+ if (ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "i40iw_hmc_sd_grp: sd_programming failed err=%d\n",
+ ret_code);
+ return ret_code;
+ }
+ sdinfo.cnt = 0;
+ }
+ }
+ if (sdinfo.cnt)
+ ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+
+ return ret_code;
+}
+
+/**
+ * i40iw_vfdev_from_fpm - return vf dev ptr for hmc function id
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ */
+struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
+{
+ struct i40iw_vfdev *vf_dev = NULL;
+ u16 idx;
+
+ for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
+ if (dev->vf_dev[idx] &&
+ ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
+ vf_dev = dev->vf_dev[idx];
+ break;
+ }
+ }
+ return vf_dev;
+}
+
+/**
+ * i40iw_vf_hmcinfo_from_fpm - get ptr to hmc for func_id
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ */
+struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
+ u8 hmc_fn_id)
+{
+ struct i40iw_hmc_info *hmc_info = NULL;
+ u16 idx;
+
+ for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
+ if (dev->vf_dev[idx] &&
+ ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
+ hmc_info = &dev->vf_dev[idx]->hmc_info;
+ break;
+ }
+ }
+ return hmc_info;
+}
+
+/**
+ * i40iw_hmc_finish_add_sd_reg - program sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: create obj info
+ */
+static enum i40iw_status_code i40iw_hmc_finish_add_sd_reg(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_create_obj_info *info)
+{
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+
+ if (!info->add_sd_cnt)
+ return 0;
+
+ return i40iw_hmc_sd_grp(dev, info->hmc_info,
+ info->hmc_info->sd_indexes[0],
+ info->add_sd_cnt, true);
+}
+
+/**
+ * i40iw_create_iw_hmc_obj - allocate backing store for hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to i40iw_hmc_iw_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ */
+enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_create_obj_info *info)
+{
+ struct i40iw_hmc_sd_entry *sd_entry;
+ u32 sd_idx, sd_lmt;
+ u32 pd_idx = 0, pd_lmt = 0;
+ u32 pd_idx1 = 0, pd_lmt1 = 0;
+ u32 i, j;
+ bool pd_error = false;
+ enum i40iw_status_code ret_code = 0;
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s: error type %u, start = %u, req cnt %u, cnt = %u\n",
+ __func__, info->rsrc_type, info->start_idx, info->count,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+ }
+
+ if (!dev->is_pf)
+ return i40iw_vchnl_vf_add_hmc_objs(dev, info->rsrc_type, 0, info->count);
+
+ i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count,
+ &sd_idx, &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ return I40IW_ERR_INVALID_SD_INDEX;
+ }
+ i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx, &pd_lmt);
+
+ for (j = sd_idx; j < sd_lmt; j++) {
+ ret_code = i40iw_add_sd_table_entry(dev->hw, info->hmc_info,
+ j,
+ info->entry_type,
+ I40IW_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ goto exit_sd_error;
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+
+ if ((sd_entry->entry_type == I40IW_SD_TYPE_PAGED) &&
+ ((dev->hmc_info == info->hmc_info) &&
+ (info->rsrc_type != I40IW_HMC_IW_PBLE))) {
+ pd_idx1 = max(pd_idx, (j * I40IW_HMC_MAX_BP_COUNT));
+ pd_lmt1 = min(pd_lmt,
+ (j + 1) * I40IW_HMC_MAX_BP_COUNT);
+ for (i = pd_idx1; i < pd_lmt1; i++) {
+ /* update the pd table entry */
+ ret_code = i40iw_add_pd_table_entry(dev->hw, info->hmc_info,
+ i, NULL);
+ if (ret_code) {
+ pd_error = true;
+ break;
+ }
+ }
+ if (pd_error) {
+ while (i && (i > pd_idx1)) {
+ i40iw_remove_pd_bp(dev->hw, info->hmc_info, (i - 1),
+ info->is_pf);
+ i--;
+ }
+ }
+ }
+ if (sd_entry->valid)
+ continue;
+
+ info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
+ info->add_sd_cnt++;
+ sd_entry->valid = true;
+ }
+ return i40iw_hmc_finish_add_sd_reg(dev, info);
+
+exit_sd_error:
+ while (j && (j > sd_idx)) {
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+ switch (sd_entry->entry_type) {
+ case I40IW_SD_TYPE_PAGED:
+ pd_idx1 = max(pd_idx,
+ (j - 1) * I40IW_HMC_MAX_BP_COUNT);
+ pd_lmt1 = min(pd_lmt, (j * I40IW_HMC_MAX_BP_COUNT));
+ for (i = pd_idx1; i < pd_lmt1; i++)
+ i40iw_prep_remove_pd_page(info->hmc_info, i);
+ break;
+ case I40IW_SD_TYPE_DIRECT:
+ i40iw_prep_remove_pd_page(info->hmc_info, (j - 1));
+ break;
+ default:
+ ret_code = I40IW_ERR_INVALID_SD_TYPE;
+ break;
+ }
+ j--;
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_finish_del_sd_reg - delete sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: dele obj info
+ * @reset: true if called before reset
+ */
+static enum i40iw_status_code i40iw_finish_del_sd_reg(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_del_obj_info *info,
+ bool reset)
+{
+ struct i40iw_hmc_sd_entry *sd_entry;
+ enum i40iw_status_code ret_code = 0;
+ u32 i, sd_idx;
+ struct i40iw_dma_mem *mem;
+
+ if (dev->is_pf && !reset)
+ ret_code = i40iw_hmc_sd_grp(dev, info->hmc_info,
+ info->hmc_info->sd_indexes[0],
+ info->del_sd_cnt, false);
+
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd sd_grp\n", __func__);
+
+ for (i = 0; i < info->del_sd_cnt; i++) {
+ sd_idx = info->hmc_info->sd_indexes[i];
+ sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
+ if (!sd_entry)
+ continue;
+ mem = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
+ &sd_entry->u.pd_table.pd_page_addr :
+ &sd_entry->u.bp.addr;
+
+ if (!mem || !mem->va)
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd mem\n", __func__);
+ else
+ i40iw_free_dma_mem(dev->hw, mem);
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_del_iw_hmc_obj - remove pe hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to i40iw_hmc_del_obj_info struct
+ * @reset: true if called before reset
+ *
+ * This will de-populate the SDs and PDs. It frees
+ * the memory for PDS and backing storage. After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ */
+enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_del_obj_info *info,
+ bool reset)
+{
+ struct i40iw_hmc_pd_table *pd_table;
+ u32 sd_idx, sd_lmt;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+ u32 i, j;
+ enum i40iw_status_code ret_code = 0;
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
+ __func__, info->start_idx, info->rsrc_type,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+ }
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC,
+ "%s: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n",
+ __func__, info->start_idx, info->count,
+ info->rsrc_type,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+ }
+ if (!dev->is_pf) {
+ ret_code = i40iw_vchnl_vf_del_hmc_obj(dev, info->rsrc_type, 0,
+ info->count);
+ if (info->rsrc_type != I40IW_HMC_IW_PBLE)
+ return ret_code;
+ }
+
+ i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx, &pd_lmt);
+
+ for (j = pd_idx; j < pd_lmt; j++) {
+ sd_idx = j / I40IW_HMC_PD_CNT_IN_SD;
+
+ if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+ I40IW_SD_TYPE_PAGED)
+ continue;
+
+ rel_pd_idx = j % I40IW_HMC_PD_CNT_IN_SD;
+ pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ if (pd_table->pd_entry[rel_pd_idx].valid) {
+ ret_code = i40iw_remove_pd_bp(dev->hw, info->hmc_info, j,
+ info->is_pf);
+ if (ret_code) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error\n", __func__);
+ return ret_code;
+ }
+ }
+ }
+
+ i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &sd_idx, &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error invalid sd_idx\n", __func__);
+ return I40IW_ERR_INVALID_SD_INDEX;
+ }
+
+ for (i = sd_idx; i < sd_lmt; i++) {
+ if (!info->hmc_info->sd_table.sd_entry[i].valid)
+ continue;
+ switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+ case I40IW_SD_TYPE_DIRECT:
+ ret_code = i40iw_prep_remove_sd_bp(info->hmc_info, i);
+ if (!ret_code) {
+ info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+ info->del_sd_cnt++;
+ }
+ break;
+ case I40IW_SD_TYPE_PAGED:
+ ret_code = i40iw_prep_remove_pd_page(info->hmc_info, i);
+ if (!ret_code) {
+ info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+ info->del_sd_cnt++;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return i40iw_finish_del_sd_reg(dev, info, reset);
+}
+
+/**
+ * i40iw_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ */
+enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
+ struct i40iw_hmc_info *hmc_info,
+ u32 sd_index,
+ enum i40iw_sd_entry_type type,
+ u64 direct_mode_sz)
+{
+ enum i40iw_status_code ret_code = 0;
+ struct i40iw_hmc_sd_entry *sd_entry;
+ bool dma_mem_alloc_done = false;
+ struct i40iw_dma_mem mem;
+ u64 alloc_len;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+ if (!sd_entry->valid) {
+ if (type == I40IW_SD_TYPE_PAGED)
+ alloc_len = I40IW_HMC_PAGED_BP_SIZE;
+ else
+ alloc_len = direct_mode_sz;
+
+ /* allocate a 4K pd page or 2M backing page */
+ ret_code = i40iw_allocate_dma_mem(hw, &mem, alloc_len,
+ I40IW_HMC_PD_BP_BUF_ALIGNMENT);
+ if (ret_code)
+ goto exit;
+ dma_mem_alloc_done = true;
+ if (type == I40IW_SD_TYPE_PAGED) {
+ ret_code = i40iw_allocate_virt_mem(hw,
+ &sd_entry->u.pd_table.pd_entry_virt_mem,
+ sizeof(struct i40iw_hmc_pd_entry) * 512);
+ if (ret_code)
+ goto exit;
+ sd_entry->u.pd_table.pd_entry = (struct i40iw_hmc_pd_entry *)
+ sd_entry->u.pd_table.pd_entry_virt_mem.va;
+
+ memcpy(&sd_entry->u.pd_table.pd_page_addr, &mem, sizeof(struct i40iw_dma_mem));
+ } else {
+ memcpy(&sd_entry->u.bp.addr, &mem, sizeof(struct i40iw_dma_mem));
+ sd_entry->u.bp.sd_pd_index = sd_index;
+ }
+
+ hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+
+ I40IW_INC_SD_REFCNT(&hmc_info->sd_table);
+ }
+ if (sd_entry->entry_type == I40IW_SD_TYPE_DIRECT)
+ I40IW_INC_BP_REFCNT(&sd_entry->u.bp);
+exit:
+ if (ret_code)
+ if (dma_mem_alloc_done)
+ i40iw_free_dma_mem(hw, &mem);
+
+ return ret_code;
+}
+
+/**
+ * i40iw_add_pd_table_entry - Adds page descriptor to the specified table
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ * 1. Initializes the pd entry
+ * 2. Adds pd_entry in the pd_table
+ * 3. Mark the entry valid in i40iw_hmc_pd_entry structure
+ * 4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ * 1. The memory for pd should be pinned down, physically contiguous and
+ * aligned on 4K boundary and zeroed memory.
+ * 2. It should be 4K in size.
+ */
+enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
+ struct i40iw_hmc_info *hmc_info,
+ u32 pd_index,
+ struct i40iw_dma_mem *rsrc_pg)
+{
+ enum i40iw_status_code ret_code = 0;
+ struct i40iw_hmc_pd_table *pd_table;
+ struct i40iw_hmc_pd_entry *pd_entry;
+ struct i40iw_dma_mem mem;
+ struct i40iw_dma_mem *page = &mem;
+ u32 sd_idx, rel_pd_idx;
+ u64 *pd_addr;
+ u64 page_desc;
+
+ if (pd_index / I40IW_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
+ return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+
+ sd_idx = (pd_index / I40IW_HMC_PD_CNT_IN_SD);
+ if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != I40IW_SD_TYPE_PAGED)
+ return 0;
+
+ rel_pd_idx = (pd_index % I40IW_HMC_PD_CNT_IN_SD);
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ if (!pd_entry->valid) {
+ if (rsrc_pg) {
+ pd_entry->rsrc_pg = true;
+ page = rsrc_pg;
+ } else {
+ ret_code = i40iw_allocate_dma_mem(hw, page,
+ I40IW_HMC_PAGED_BP_SIZE,
+ I40IW_HMC_PD_BP_BUF_ALIGNMENT);
+ if (ret_code)
+ return ret_code;
+ pd_entry->rsrc_pg = false;
+ }
+
+ memcpy(&pd_entry->bp.addr, page, sizeof(struct i40iw_dma_mem));
+ pd_entry->bp.sd_pd_index = pd_index;
+ pd_entry->bp.entry_type = I40IW_SD_TYPE_PAGED;
+ page_desc = page->pa | 0x1;
+
+ pd_addr = (u64 *)pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+
+ memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
+
+ pd_entry->sd_index = sd_idx;
+ pd_entry->valid = true;
+ I40IW_INC_PD_REFCNT(pd_table);
+ if (hmc_info->hmc_fn_id < I40IW_FIRST_VF_FPM_ID)
+ I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, rel_pd_idx);
+ else if (hw->hmc.hmc_fn_id != hmc_info->hmc_fn_id)
+ I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, rel_pd_idx,
+ hmc_info->hmc_fn_id);
+ }
+ I40IW_INC_BP_REFCNT(&pd_entry->bp);
+
+ return 0;
+}
+
+/**
+ * i40iw_remove_pd_bp - remove a backing page from a page descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: distinguishes a VF from a PF
+ *
+ * This function:
+ * 1. Marks the entry in pd table (for paged address mode) or in sd table
+ * (for direct address mode) invalid.
+ * 2. Write to register PMPDINV to invalidate the backing page in FV cache
+ * 3. Decrement the ref count for the pd _entry
+ * assumptions:
+ * 1. Caller can deallocate the memory used by backing storage after this
+ * function returns.
+ */
+enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
+ struct i40iw_hmc_info *hmc_info,
+ u32 idx,
+ bool is_pf)
+{
+ struct i40iw_hmc_pd_entry *pd_entry;
+ struct i40iw_hmc_pd_table *pd_table;
+ struct i40iw_hmc_sd_entry *sd_entry;
+ u32 sd_idx, rel_pd_idx;
+ struct i40iw_dma_mem *mem;
+ u64 *pd_addr;
+
+ sd_idx = idx / I40IW_HMC_PD_CNT_IN_SD;
+ rel_pd_idx = idx % I40IW_HMC_PD_CNT_IN_SD;
+ if (sd_idx >= hmc_info->sd_table.sd_cnt)
+ return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+ if (sd_entry->entry_type != I40IW_SD_TYPE_PAGED)
+ return I40IW_ERR_INVALID_SD_TYPE;
+
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ I40IW_DEC_BP_REFCNT(&pd_entry->bp);
+ if (pd_entry->bp.ref_cnt)
+ return 0;
+
+ pd_entry->valid = false;
+ I40IW_DEC_PD_REFCNT(pd_table);
+ pd_addr = (u64 *)pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+ memset(pd_addr, 0, sizeof(u64));
+ if (is_pf)
+ I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
+ else
+ I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx,
+ hmc_info->hmc_fn_id);
+
+ if (!pd_entry->rsrc_pg) {
+ mem = &pd_entry->bp.addr;
+ if (!mem || !mem->va)
+ return I40IW_ERR_PARAM;
+ i40iw_free_dma_mem(hw, mem);
+ }
+ if (!pd_table->ref_cnt)
+ i40iw_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
+
+ return 0;
+}
+
+/**
+ * i40iw_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ */
+enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx)
+{
+ struct i40iw_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+ I40IW_DEC_BP_REFCNT(&sd_entry->u.bp);
+ if (sd_entry->u.bp.ref_cnt)
+ return I40IW_ERR_NOT_READY;
+
+ I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
+ sd_entry->valid = false;
+
+ return 0;
+}
+
+/**
+ * i40iw_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ */
+enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info,
+ u32 idx)
+{
+ struct i40iw_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+ if (sd_entry->u.pd_table.ref_cnt)
+ return I40IW_ERR_NOT_READY;
+
+ sd_entry->valid = false;
+ I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
+
+ return 0;
+}
+
+/**
+ * i40iw_pf_init_vfhmc -
+ * @vf_cnt_array: array of cnt values of iwarp hmc objects
+ * @vf_hmc_fn_id: hmc function id ofr vf driver
+ * @dev: pointer to i40iw_dev struct
+ *
+ * Called by pf driver to initialize hmc_info for vf driver instance.
+ */
+enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev,
+ u8 vf_hmc_fn_id,
+ u32 *vf_cnt_array)
+{
+ struct i40iw_hmc_info *hmc_info;
+ enum i40iw_status_code ret_code = 0;
+ u32 i;
+
+ if ((vf_hmc_fn_id < I40IW_FIRST_VF_FPM_ID) ||
+ (vf_hmc_fn_id >= I40IW_FIRST_VF_FPM_ID +
+ I40IW_MAX_PE_ENABLED_VF_COUNT)) {
+ i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: invalid vf_hmc_fn_id 0x%x\n",
+ __func__, vf_hmc_fn_id);
+ return I40IW_ERR_INVALID_HMCFN_ID;
+ }
+
+ ret_code = i40iw_sc_init_iw_hmc(dev, vf_hmc_fn_id);
+ if (ret_code)
+ return ret_code;
+
+ hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, vf_hmc_fn_id);
+
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
+ if (vf_cnt_array)
+ hmc_info->hmc_obj[i].cnt =
+ vf_cnt_array[i - I40IW_HMC_IW_QP];
+ else
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.h b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
new file mode 100644
index 000000000000..4c3fdd875621
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
@@ -0,0 +1,241 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_HMC_H
+#define I40IW_HMC_H
+
+#include "i40iw_d.h"
+
+struct i40iw_hw;
+enum i40iw_status_code;
+
+#define I40IW_HMC_MAX_BP_COUNT 512
+#define I40IW_MAX_SD_ENTRIES 11
+#define I40IW_HW_DBG_HMC_INVALID_BP_MARK 0xCA
+
+#define I40IW_HMC_INFO_SIGNATURE 0x484D5347
+#define I40IW_HMC_PD_CNT_IN_SD 512
+#define I40IW_HMC_DIRECT_BP_SIZE 0x200000
+#define I40IW_HMC_MAX_SD_COUNT 4096
+#define I40IW_HMC_PAGED_BP_SIZE 4096
+#define I40IW_HMC_PD_BP_BUF_ALIGNMENT 4096
+#define I40IW_FIRST_VF_FPM_ID 16
+#define FPM_MULTIPLIER 1024
+
+#define I40IW_INC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt++)
+#define I40IW_INC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt++)
+#define I40IW_INC_BP_REFCNT(bp) ((bp)->ref_cnt++)
+
+#define I40IW_DEC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt--)
+#define I40IW_DEC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt--)
+#define I40IW_DEC_BP_REFCNT(bp) ((bp)->ref_cnt--)
+
+/**
+ * I40IW_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ */
+#define I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx) \
+ i40iw_wr32((hw), I40E_PFHMC_PDINV, \
+ (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
+ (0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT) | \
+ ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+/**
+ * I40IW_INVALIDATE_VF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ * @hmc_fn_id: VF's function id
+ */
+#define I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id) \
+ i40iw_wr32(hw, I40E_GLHMC_VFPDINV(hmc_fn_id - I40IW_FIRST_VF_FPM_ID), \
+ ((sd_idx << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
+ (pd_idx << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+struct i40iw_hmc_obj_info {
+ u64 base;
+ u32 max_cnt;
+ u32 cnt;
+ u64 size;
+};
+
+enum i40iw_sd_entry_type {
+ I40IW_SD_TYPE_INVALID = 0,
+ I40IW_SD_TYPE_PAGED = 1,
+ I40IW_SD_TYPE_DIRECT = 2
+};
+
+struct i40iw_hmc_bp {
+ enum i40iw_sd_entry_type entry_type;
+ struct i40iw_dma_mem addr;
+ u32 sd_pd_index;
+ u32 ref_cnt;
+};
+
+struct i40iw_hmc_pd_entry {
+ struct i40iw_hmc_bp bp;
+ u32 sd_index;
+ bool rsrc_pg;
+ bool valid;
+};
+
+struct i40iw_hmc_pd_table {
+ struct i40iw_dma_mem pd_page_addr;
+ struct i40iw_hmc_pd_entry *pd_entry;
+ struct i40iw_virt_mem pd_entry_virt_mem;
+ u32 ref_cnt;
+ u32 sd_index;
+};
+
+struct i40iw_hmc_sd_entry {
+ enum i40iw_sd_entry_type entry_type;
+ bool valid;
+
+ union {
+ struct i40iw_hmc_pd_table pd_table;
+ struct i40iw_hmc_bp bp;
+ } u;
+};
+
+struct i40iw_hmc_sd_table {
+ struct i40iw_virt_mem addr;
+ u32 sd_cnt;
+ u32 ref_cnt;
+ struct i40iw_hmc_sd_entry *sd_entry;
+};
+
+struct i40iw_hmc_info {
+ u32 signature;
+ u8 hmc_fn_id;
+ u16 first_sd_index;
+
+ struct i40iw_hmc_obj_info *hmc_obj;
+ struct i40iw_virt_mem hmc_obj_virt_mem;
+ struct i40iw_hmc_sd_table sd_table;
+ u16 sd_indexes[I40IW_HMC_MAX_SD_COUNT];
+};
+
+struct update_sd_entry {
+ u64 cmd;
+ u64 data;
+};
+
+struct i40iw_update_sds_info {
+ u32 cnt;
+ u8 hmc_fn_id;
+ struct update_sd_entry entry[I40IW_MAX_SD_ENTRIES];
+};
+
+struct i40iw_ccq_cqe_info;
+struct i40iw_hmc_fcn_info {
+ void (*callback_fcn)(struct i40iw_sc_dev *, void *,
+ struct i40iw_ccq_cqe_info *);
+ void *cqp_callback_param;
+ u32 vf_id;
+ u16 iw_vf_idx;
+ bool free_fcn;
+};
+
+enum i40iw_hmc_rsrc_type {
+ I40IW_HMC_IW_QP = 0,
+ I40IW_HMC_IW_CQ = 1,
+ I40IW_HMC_IW_SRQ = 2,
+ I40IW_HMC_IW_HTE = 3,
+ I40IW_HMC_IW_ARP = 4,
+ I40IW_HMC_IW_APBVT_ENTRY = 5,
+ I40IW_HMC_IW_MR = 6,
+ I40IW_HMC_IW_XF = 7,
+ I40IW_HMC_IW_XFFL = 8,
+ I40IW_HMC_IW_Q1 = 9,
+ I40IW_HMC_IW_Q1FL = 10,
+ I40IW_HMC_IW_TIMER = 11,
+ I40IW_HMC_IW_FSIMC = 12,
+ I40IW_HMC_IW_FSIAV = 13,
+ I40IW_HMC_IW_PBLE = 14,
+ I40IW_HMC_IW_MAX = 15,
+};
+
+struct i40iw_hmc_create_obj_info {
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_virt_mem add_sd_virt_mem;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ u32 add_sd_cnt;
+ enum i40iw_sd_entry_type entry_type;
+ bool is_pf;
+};
+
+struct i40iw_hmc_del_obj_info {
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_virt_mem del_sd_virt_mem;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ u32 del_sd_cnt;
+ bool is_pf;
+};
+
+enum i40iw_status_code i40iw_copy_dma_mem(struct i40iw_hw *hw, void *dest_buf,
+ struct i40iw_dma_mem *src_mem, u64 src_offset, u64 size);
+enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_create_obj_info *info);
+enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_del_obj_info *info,
+ bool reset);
+enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev, u8 hmc_fn_id,
+ u64 pa, u32 sd_idx, enum i40iw_sd_entry_type type,
+ bool setsd);
+enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
+ struct i40iw_update_sds_info *info);
+struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id);
+struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
+ u8 hmc_fn_id);
+enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
+ struct i40iw_hmc_info *hmc_info, u32 sd_index,
+ enum i40iw_sd_entry_type type, u64 direct_mode_sz);
+enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
+ struct i40iw_hmc_info *hmc_info, u32 pd_index,
+ struct i40iw_dma_mem *rsrc_pg);
+enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
+ struct i40iw_hmc_info *hmc_info, u32 idx, bool is_pf);
+enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx);
+enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info, u32 idx);
+
+#define ENTER_SHARED_FUNCTION()
+#define EXIT_SHARED_FUNCTION()
+
+#endif /* I40IW_HMC_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
new file mode 100644
index 000000000000..9fd302425563
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -0,0 +1,730 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+
+#include "i40iw.h"
+
+/**
+ * i40iw_initialize_hw_resources - initialize hw resource during open
+ * @iwdev: iwarp device
+ */
+u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
+{
+ unsigned long num_pds;
+ u32 resources_size;
+ u32 max_mr;
+ u32 max_qp;
+ u32 max_cq;
+ u32 arp_table_size;
+ u32 mrdrvbits;
+ void *resource_ptr;
+
+ max_qp = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt;
+ max_cq = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+ max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
+ arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
+ iwdev->max_cqe = 0xFFFFF;
+ num_pds = max_qp * 4;
+ resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
+ resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
+ resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
+ resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
+ resources_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
+ resources_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
+ resources_size += sizeof(struct i40iw_qp **) * max_qp;
+ iwdev->mem_resources = kzalloc(resources_size, GFP_KERNEL);
+
+ if (!iwdev->mem_resources)
+ return -ENOMEM;
+
+ iwdev->max_qp = max_qp;
+ iwdev->max_mr = max_mr;
+ iwdev->max_cq = max_cq;
+ iwdev->max_pd = num_pds;
+ iwdev->arp_table_size = arp_table_size;
+ iwdev->arp_table = (struct i40iw_arp_entry *)iwdev->mem_resources;
+ resource_ptr = iwdev->mem_resources + (sizeof(struct i40iw_arp_entry) * arp_table_size);
+
+ iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+ iwdev->allocated_qps = resource_ptr;
+ iwdev->allocated_cqs = &iwdev->allocated_qps[BITS_TO_LONGS(max_qp)];
+ iwdev->allocated_mrs = &iwdev->allocated_cqs[BITS_TO_LONGS(max_cq)];
+ iwdev->allocated_pds = &iwdev->allocated_mrs[BITS_TO_LONGS(max_mr)];
+ iwdev->allocated_arps = &iwdev->allocated_pds[BITS_TO_LONGS(num_pds)];
+ iwdev->qp_table = (struct i40iw_qp **)(&iwdev->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
+ set_bit(0, iwdev->allocated_mrs);
+ set_bit(0, iwdev->allocated_qps);
+ set_bit(0, iwdev->allocated_cqs);
+ set_bit(0, iwdev->allocated_pds);
+ set_bit(0, iwdev->allocated_arps);
+
+ /* Following for ILQ/IEQ */
+ set_bit(1, iwdev->allocated_qps);
+ set_bit(1, iwdev->allocated_cqs);
+ set_bit(1, iwdev->allocated_pds);
+ set_bit(2, iwdev->allocated_cqs);
+ set_bit(2, iwdev->allocated_pds);
+
+ spin_lock_init(&iwdev->resource_lock);
+ mrdrvbits = 24 - get_count_order(iwdev->max_mr);
+ iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
+ return 0;
+}
+
+/**
+ * i40iw_cqp_ce_handler - handle cqp completions
+ * @iwdev: iwarp device
+ * @arm: flag to arm after completions
+ * @cq: cq for cqp completions
+ */
+static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq, bool arm)
+{
+ struct i40iw_cqp_request *cqp_request;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ u32 cqe_count = 0;
+ struct i40iw_ccq_cqe_info info;
+ int ret;
+
+ do {
+ memset(&info, 0, sizeof(info));
+ ret = dev->ccq_ops->ccq_get_cqe_info(cq, &info);
+ if (ret)
+ break;
+ cqp_request = (struct i40iw_cqp_request *)(unsigned long)info.scratch;
+ if (info.error)
+ i40iw_pr_err("opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
+ info.op_code, info.maj_err_code, info.min_err_code);
+ if (cqp_request) {
+ cqp_request->compl_info.maj_err_code = info.maj_err_code;
+ cqp_request->compl_info.min_err_code = info.min_err_code;
+ cqp_request->compl_info.op_ret_val = info.op_ret_val;
+ cqp_request->compl_info.error = info.error;
+
+ if (cqp_request->waiting) {
+ cqp_request->request_done = true;
+ wake_up(&cqp_request->waitq);
+ i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+ } else {
+ if (cqp_request->callback_fcn)
+ cqp_request->callback_fcn(cqp_request, 1);
+ i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+ }
+ }
+
+ cqe_count++;
+ } while (1);
+
+ if (arm && cqe_count) {
+ i40iw_process_bh(dev);
+ dev->ccq_ops->ccq_arm(cq);
+ }
+}
+
+/**
+ * i40iw_iwarp_ce_handler - handle iwarp completions
+ * @iwdev: iwarp device
+ * @iwcp: iwarp cq receiving event
+ */
+static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev,
+ struct i40iw_sc_cq *iwcq)
+{
+ struct i40iw_cq *i40iwcq = iwcq->back_cq;
+
+ if (i40iwcq->ibcq.comp_handler)
+ i40iwcq->ibcq.comp_handler(&i40iwcq->ibcq,
+ i40iwcq->ibcq.cq_context);
+}
+
+/**
+ * i40iw_puda_ce_handler - handle puda completion events
+ * @iwdev: iwarp device
+ * @cq: puda completion q for event
+ */
+static void i40iw_puda_ce_handler(struct i40iw_device *iwdev,
+ struct i40iw_sc_cq *cq)
+{
+ struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)&iwdev->sc_dev;
+ enum i40iw_status_code status;
+ u32 compl_error;
+
+ do {
+ status = i40iw_puda_poll_completion(dev, cq, &compl_error);
+ if (status == I40IW_ERR_QUEUE_EMPTY)
+ break;
+ if (status) {
+ i40iw_pr_err("puda status = %d\n", status);
+ break;
+ }
+ if (compl_error) {
+ i40iw_pr_err("puda compl_err =0x%x\n", compl_error);
+ break;
+ }
+ } while (1);
+
+ dev->ccq_ops->ccq_arm(cq);
+}
+
+/**
+ * i40iw_process_ceq - handle ceq for completions
+ * @iwdev: iwarp device
+ * @ceq: ceq having cq for completion
+ */
+void i40iw_process_ceq(struct i40iw_device *iwdev, struct i40iw_ceq *ceq)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_sc_ceq *sc_ceq;
+ struct i40iw_sc_cq *cq;
+ bool arm = true;
+
+ sc_ceq = &ceq->sc_ceq;
+ do {
+ cq = dev->ceq_ops->process_ceq(dev, sc_ceq);
+ if (!cq)
+ break;
+
+ if (cq->cq_type == I40IW_CQ_TYPE_CQP)
+ i40iw_cqp_ce_handler(iwdev, cq, arm);
+ else if (cq->cq_type == I40IW_CQ_TYPE_IWARP)
+ i40iw_iwarp_ce_handler(iwdev, cq);
+ else if ((cq->cq_type == I40IW_CQ_TYPE_ILQ) ||
+ (cq->cq_type == I40IW_CQ_TYPE_IEQ))
+ i40iw_puda_ce_handler(iwdev, cq);
+ } while (1);
+}
+
+/**
+ * i40iw_next_iw_state - modify qp state
+ * @iwqp: iwarp qp to modify
+ * @state: next state for qp
+ * @del_hash: del hash
+ * @term: term message
+ * @termlen: length of term message
+ */
+void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+ u8 state,
+ u8 del_hash,
+ u8 term,
+ u8 termlen)
+{
+ struct i40iw_modify_qp_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.next_iwarp_state = state;
+ info.remove_hash_idx = del_hash;
+ info.cq_num_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.dont_send_term = true;
+ info.dont_send_fin = true;
+ info.termlen = termlen;
+
+ if (term & I40IWQP_TERM_SEND_TERM_ONLY)
+ info.dont_send_term = false;
+ if (term & I40IWQP_TERM_SEND_FIN_ONLY)
+ info.dont_send_fin = false;
+ if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
+ info.reset_tcp_conn = true;
+ i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
+}
+
+/**
+ * i40iw_process_aeq - handle aeq events
+ * @iwdev: iwarp device
+ */
+void i40iw_process_aeq(struct i40iw_device *iwdev)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_aeq *aeq = &iwdev->aeq;
+ struct i40iw_sc_aeq *sc_aeq = &aeq->sc_aeq;
+ struct i40iw_aeqe_info aeinfo;
+ struct i40iw_aeqe_info *info = &aeinfo;
+ int ret;
+ struct i40iw_qp *iwqp = NULL;
+ struct i40iw_sc_cq *cq = NULL;
+ struct i40iw_cq *iwcq = NULL;
+ struct i40iw_sc_qp *qp = NULL;
+ struct i40iw_qp_host_ctx_info *ctx_info = NULL;
+ unsigned long flags;
+
+ u32 aeqcnt = 0;
+
+ if (!sc_aeq->size)
+ return;
+
+ do {
+ memset(info, 0, sizeof(*info));
+ ret = dev->aeq_ops->get_next_aeqe(sc_aeq, info);
+ if (ret)
+ break;
+
+ aeqcnt++;
+ i40iw_debug(dev, I40IW_DEBUG_AEQ,
+ "%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
+ __func__, info->ae_id, info->qp, info->qp_cq_id);
+ if (info->qp) {
+ iwqp = iwdev->qp_table[info->qp_cq_id];
+ if (!iwqp) {
+ i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
+ continue;
+ }
+ qp = &iwqp->sc_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = info->tcp_state;
+ iwqp->hw_iwarp_state = info->iwarp_state;
+ iwqp->last_aeq = info->ae_id;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ ctx_info = &iwqp->ctx_info;
+ ctx_info->err_rq_idx_valid = true;
+ } else {
+ if (info->ae_id != I40IW_AE_CQ_OPERATION_ERROR)
+ continue;
+ }
+
+ switch (info->ae_id) {
+ case I40IW_AE_LLP_FIN_RECEIVED:
+ if (qp->term_flags)
+ continue;
+ if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+ iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
+ if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
+ (iwqp->ibqp_state == IB_QPS_RTS)) {
+ i40iw_next_iw_state(iwqp,
+ I40IW_QP_STATE_CLOSING, 0, 0, 0);
+ i40iw_cm_disconn(iwqp);
+ }
+ iwqp->cm_id->add_ref(iwqp->cm_id);
+ i40iw_schedule_cm_timer(iwqp->cm_node,
+ (struct i40iw_puda_buf *)iwqp,
+ I40IW_TIMER_TYPE_CLOSE, 1, 0);
+ }
+ break;
+ case I40IW_AE_LLP_CLOSE_COMPLETE:
+ if (qp->term_flags)
+ i40iw_terminate_done(qp, 0);
+ else
+ i40iw_cm_disconn(iwqp);
+ break;
+ case I40IW_AE_RESET_SENT:
+ i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0);
+ i40iw_cm_disconn(iwqp);
+ break;
+ case I40IW_AE_LLP_CONNECTION_RESET:
+ if (atomic_read(&iwqp->close_timer_started))
+ continue;
+ i40iw_cm_disconn(iwqp);
+ break;
+ case I40IW_AE_TERMINATE_SENT:
+ i40iw_terminate_send_fin(qp);
+ break;
+ case I40IW_AE_LLP_TERMINATE_RECEIVED:
+ i40iw_terminate_received(qp, info);
+ break;
+ case I40IW_AE_CQ_OPERATION_ERROR:
+ i40iw_pr_err("Processing an iWARP related AE for CQ misc = 0x%04X\n",
+ info->ae_id);
+ cq = (struct i40iw_sc_cq *)(unsigned long)info->compl_ctx;
+ iwcq = (struct i40iw_cq *)cq->back_cq;
+
+ if (iwcq->ibcq.event_handler) {
+ struct ib_event ibevent;
+
+ ibevent.device = iwcq->ibcq.device;
+ ibevent.event = IB_EVENT_CQ_ERR;
+ ibevent.element.cq = &iwcq->ibcq;
+ iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
+ }
+ break;
+ case I40IW_AE_PRIV_OPERATION_DENIED:
+ case I40IW_AE_STAG_ZERO_INVALID:
+ case I40IW_AE_IB_RREQ_AND_Q1_FULL:
+ case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
+ case I40IW_AE_DDP_UBE_INVALID_MO:
+ case I40IW_AE_DDP_UBE_INVALID_QN:
+ case I40IW_AE_DDP_NO_L_BIT:
+ case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST:
+ case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ case I40IW_AE_INVALID_ARP_ENTRY:
+ case I40IW_AE_INVALID_TCP_OPTION_RCVD:
+ case I40IW_AE_STALE_ARP_ENTRY:
+ case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
+ case I40IW_AE_LLP_SYN_RECEIVED:
+ case I40IW_AE_LLP_TOO_MANY_RETRIES:
+ case I40IW_AE_LLP_DOUBT_REACHABILITY:
+ case I40IW_AE_LCE_QP_CATASTROPHIC:
+ case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
+ case I40IW_AE_LCE_CQ_CATASTROPHIC:
+ case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
+ case I40IW_AE_QP_SUSPEND_COMPLETE:
+ ctx_info->err_rq_idx_valid = false;
+ default:
+ if (!info->sq && ctx_info->err_rq_idx_valid) {
+ ctx_info->err_rq_idx = info->wqe_idx;
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ }
+ i40iw_terminate_connection(qp, info);
+ break;
+ }
+ } while (1);
+
+ if (aeqcnt)
+ dev->aeq_ops->repost_aeq_entries(dev, aeqcnt);
+}
+
+/**
+ * i40iw_manage_apbvt - add or delete tcp port
+ * @iwdev: iwarp device
+ * @accel_local_port: port for apbvt
+ * @add_port: add or delete port
+ */
+int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
+{
+ struct i40iw_apbvt_info *info;
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.manage_apbvt_entry.info;
+
+ memset(info, 0, sizeof(*info));
+ info->add = add_port;
+ info->port = cpu_to_le16(accel_local_port);
+
+ cqp_info->cqp_cmd = OP_MANAGE_APBVT_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Manage APBVT entry fail");
+ return status;
+}
+
+/**
+ * i40iw_manage_arp_cache - manage hw arp cache
+ * @iwdev: iwarp device
+ * @mac_addr: mac address ptr
+ * @ip_addr: ip addr for arp cache
+ * @action: add, delete or modify
+ */
+void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
+ unsigned char *mac_addr,
+ __be32 *ip_addr,
+ bool ipv4,
+ u32 action)
+{
+ struct i40iw_add_arp_cache_entry_info *info;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ int arp_index;
+
+ arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
+ if (arp_index == -1)
+ return;
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ if (action == I40IW_ARP_ADD) {
+ cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
+ info = &cqp_info->in.u.add_arp_cache_entry.info;
+ memset(info, 0, sizeof(*info));
+ info->arp_index = cpu_to_le32(arp_index);
+ info->permanent = true;
+ ether_addr_copy(info->mac_addr, mac_addr);
+ cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+ } else {
+ cqp_info->cqp_cmd = OP_DELETE_ARP_CACHE_ENTRY;
+ cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.del_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
+ }
+
+ cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->post_sq = 1;
+ if (i40iw_handle_cqp_op(iwdev, cqp_request))
+ i40iw_pr_err("CQP-OP Add/Del Arp Cache entry fail");
+}
+
+/**
+ * i40iw_send_syn_cqp_callback - do syn/ack after qhash
+ * @cqp_request: qhash cqp completion
+ * @send_ack: flag send ack
+ */
+static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u32 send_ack)
+{
+ i40iw_send_syn(cqp_request->param, send_ack);
+}
+
+/**
+ * i40iw_manage_qhash - add or modify qhash
+ * @iwdev: iwarp device
+ * @cminfo: cm info for qhash
+ * @etype: type (syn or quad)
+ * @mtype: type of qhash
+ * @cmnode: cmnode associated with connection
+ * @wait: wait for completion
+ * @user_pri:user pri of the connection
+ */
+enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
+ struct i40iw_cm_info *cminfo,
+ enum i40iw_quad_entry_type etype,
+ enum i40iw_quad_hash_manage_type mtype,
+ void *cmnode,
+ bool wait)
+{
+ struct i40iw_qhash_table_info *info;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ enum i40iw_status_code status;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.manage_qhash_table_entry.info;
+ memset(info, 0, sizeof(*info));
+
+ info->manage = mtype;
+ info->entry_type = etype;
+ if (cminfo->vlan_id != 0xFFFF) {
+ info->vlan_valid = true;
+ info->vlan_id = cpu_to_le16(cminfo->vlan_id);
+ } else {
+ info->vlan_valid = false;
+ }
+
+ info->ipv4_valid = cminfo->ipv4;
+ ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
+ info->qp_num = cpu_to_le32(dev->ilq->qp_id);
+ info->dest_port = cpu_to_le16(cminfo->loc_port);
+ info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
+ info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
+ info->dest_ip[2] = cpu_to_le32(cminfo->loc_addr[2]);
+ info->dest_ip[3] = cpu_to_le32(cminfo->loc_addr[3]);
+ if (etype == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
+ info->src_port = cpu_to_le16(cminfo->rem_port);
+ info->src_ip[0] = cpu_to_le32(cminfo->rem_addr[0]);
+ info->src_ip[1] = cpu_to_le32(cminfo->rem_addr[1]);
+ info->src_ip[2] = cpu_to_le32(cminfo->rem_addr[2]);
+ info->src_ip[3] = cpu_to_le32(cminfo->rem_addr[3]);
+ }
+ if (cmnode) {
+ cqp_request->callback_fcn = i40iw_send_syn_cqp_callback;
+ cqp_request->param = (void *)cmnode;
+ }
+
+ if (info->ipv4_valid)
+ i40iw_debug(dev, I40IW_DEBUG_CM,
+ "%s:%s IP=%pI4, port=%d, mac=%pM, vlan_id=%d\n",
+ __func__, (!mtype) ? "DELETE" : "ADD",
+ info->dest_ip,
+ info->dest_port, info->mac_addr, cminfo->vlan_id);
+ else
+ i40iw_debug(dev, I40IW_DEBUG_CM,
+ "%s:%s IP=%pI6, port=%d, mac=%pM, vlan_id=%d\n",
+ __func__, (!mtype) ? "DELETE" : "ADD",
+ info->dest_ip,
+ info->dest_port, info->mac_addr, cminfo->vlan_id);
+ cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->cqp_cmd = OP_MANAGE_QHASH_TABLE_ENTRY;
+ cqp_info->post_sq = 1;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Manage Qhash Entry fail");
+ return status;
+}
+
+/**
+ * i40iw_hw_flush_wqes - flush qp's wqe
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ * @info: info for flush
+ * @wait: flag wait for completion
+ */
+enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
+ struct i40iw_sc_qp *qp,
+ struct i40iw_qp_flush_info *info,
+ bool wait)
+{
+ enum i40iw_status_code status;
+ struct i40iw_qp_flush_info *hw_info;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
+ memcpy(hw_info, info, sizeof(*hw_info));
+
+ cqp_info->cqp_cmd = OP_QP_FLUSH_WQES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_flush_wqes.qp = qp;
+ cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Flush WQE's fail");
+ return status;
+}
+
+/**
+ * i40iw_hw_manage_vf_pble_bp - manage vf pbles
+ * @iwdev: iwarp device
+ * @info: info for managing pble
+ * @wait: flag wait for completion
+ */
+enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
+ struct i40iw_manage_vf_pble_info *info,
+ bool wait)
+{
+ enum i40iw_status_code status;
+ struct i40iw_manage_vf_pble_info *hw_info;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ if ((iwdev->init_state < CCQ_CREATED) && wait)
+ wait = false;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ hw_info = &cqp_request->info.in.u.manage_vf_pble_bp.info;
+ memcpy(hw_info, info, sizeof(*hw_info));
+
+ cqp_info->cqp_cmd = OP_MANAGE_VF_PBLE_BP;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_vf_pble_bp.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.manage_vf_pble_bp.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Manage VF pble_bp fail");
+ return status;
+}
+
+/**
+ * i40iw_get_ib_wc - return change flush code to IB's
+ * @opcode: iwarp flush code
+ */
+static enum ib_wc_status i40iw_get_ib_wc(enum i40iw_flush_opcode opcode)
+{
+ switch (opcode) {
+ case FLUSH_PROT_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case FLUSH_REM_ACCESS_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case FLUSH_LOC_QP_OP_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case FLUSH_REM_OP_ERR:
+ return IB_WC_REM_OP_ERR;
+ case FLUSH_LOC_LEN_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case FLUSH_GENERAL_ERR:
+ return IB_WC_GENERAL_ERR;
+ case FLUSH_FATAL_ERR:
+ default:
+ return IB_WC_FATAL_ERR;
+ }
+}
+
+/**
+ * i40iw_set_flush_info - set flush info
+ * @pinfo: set flush info
+ * @min: minor err
+ * @maj: major err
+ * @opcode: flush error code
+ */
+static void i40iw_set_flush_info(struct i40iw_qp_flush_info *pinfo,
+ u16 *min,
+ u16 *maj,
+ enum i40iw_flush_opcode opcode)
+{
+ *min = (u16)i40iw_get_ib_wc(opcode);
+ *maj = CQE_MAJOR_DRV;
+ pinfo->userflushcode = true;
+}
+
+/**
+ * i40iw_flush_wqes - flush wqe for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp to flush wqes
+ */
+void i40iw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp *iwqp)
+{
+ struct i40iw_qp_flush_info info;
+ struct i40iw_qp_flush_info *pinfo = &info;
+
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+ memset(pinfo, 0, sizeof(*pinfo));
+ info.sq = true;
+ info.rq = true;
+ if (qp->term_flags) {
+ i40iw_set_flush_info(pinfo, &pinfo->sq_minor_code,
+ &pinfo->sq_major_code, qp->flush_code);
+ i40iw_set_flush_info(pinfo, &pinfo->rq_minor_code,
+ &pinfo->rq_major_code, qp->flush_code);
+ }
+ (void)i40iw_hw_flush_wqes(iwdev, &iwqp->sc_qp, &info, true);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
new file mode 100644
index 000000000000..90e5af21737e
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -0,0 +1,1910 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/addrconf.h>
+
+#include "i40iw.h"
+#include "i40iw_register.h"
+#include <net/netevent.h>
+#define CLIENT_IW_INTERFACE_VERSION_MAJOR 0
+#define CLIENT_IW_INTERFACE_VERSION_MINOR 01
+#define CLIENT_IW_INTERFACE_VERSION_BUILD 00
+
+#define DRV_VERSION_MAJOR 0
+#define DRV_VERSION_MINOR 5
+#define DRV_VERSION_BUILD 123
+#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
+ __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
+
+static int push_mode;
+module_param(push_mode, int, 0644);
+MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
+
+static int resource_profile;
+module_param(resource_profile, int, 0644);
+MODULE_PARM_DESC(resource_profile,
+ "Resource Profile: 0=no VF RDMA support (default), 1=Weighted VF, 2=Even Distribution");
+
+static int max_rdma_vfs = 32;
+module_param(max_rdma_vfs, int, 0644);
+MODULE_PARM_DESC(max_rdma_vfs, "Maximum VF count: 0-32 32=default");
+static int mpa_version = 2;
+module_param(mpa_version, int, 0644);
+MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2");
+
+MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static struct i40e_client i40iw_client;
+static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw";
+
+static LIST_HEAD(i40iw_handlers);
+static spinlock_t i40iw_handler_lock;
+
+static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
+ u32 vf_id, u8 *msg, u16 len);
+
+static struct notifier_block i40iw_inetaddr_notifier = {
+ .notifier_call = i40iw_inetaddr_event
+};
+
+static struct notifier_block i40iw_inetaddr6_notifier = {
+ .notifier_call = i40iw_inet6addr_event
+};
+
+static struct notifier_block i40iw_net_notifier = {
+ .notifier_call = i40iw_net_event
+};
+
+static int i40iw_notifiers_registered;
+
+/**
+ * i40iw_find_i40e_handler - find a handler given a client info
+ * @ldev: pointer to a client info
+ */
+static struct i40iw_handler *i40iw_find_i40e_handler(struct i40e_info *ldev)
+{
+ struct i40iw_handler *hdl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i40iw_handler_lock, flags);
+ list_for_each_entry(hdl, &i40iw_handlers, list) {
+ if (hdl->ldev.netdev == ldev->netdev) {
+ spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+ return hdl;
+ }
+ }
+ spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+ return NULL;
+}
+
+/**
+ * i40iw_find_netdev - find a handler given a netdev
+ * @netdev: pointer to net_device
+ */
+struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev)
+{
+ struct i40iw_handler *hdl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i40iw_handler_lock, flags);
+ list_for_each_entry(hdl, &i40iw_handlers, list) {
+ if (hdl->ldev.netdev == netdev) {
+ spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+ return hdl;
+ }
+ }
+ spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+ return NULL;
+}
+
+/**
+ * i40iw_add_handler - add a handler to the list
+ * @hdl: handler to be added to the handler list
+ */
+static void i40iw_add_handler(struct i40iw_handler *hdl)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i40iw_handler_lock, flags);
+ list_add(&hdl->list, &i40iw_handlers);
+ spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+}
+
+/**
+ * i40iw_del_handler - delete a handler from the list
+ * @hdl: handler to be deleted from the handler list
+ */
+static int i40iw_del_handler(struct i40iw_handler *hdl)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i40iw_handler_lock, flags);
+ list_del(&hdl->list);
+ spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+ return 0;
+}
+
+/**
+ * i40iw_enable_intr - set up device interrupts
+ * @dev: hardware control device structure
+ * @msix_id: id of the interrupt to be enabled
+ */
+static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
+{
+ u32 val;
+
+ val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+ (3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+ if (dev->is_pf)
+ i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_id - 1), val);
+ else
+ i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_id - 1), val);
+}
+
+/**
+ * i40iw_dpc - tasklet for aeq and ceq 0
+ * @data: iwarp device
+ */
+static void i40iw_dpc(unsigned long data)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)data;
+
+ if (iwdev->msix_shared)
+ i40iw_process_ceq(iwdev, iwdev->ceqlist);
+ i40iw_process_aeq(iwdev);
+ i40iw_enable_intr(&iwdev->sc_dev, iwdev->iw_msixtbl[0].idx);
+}
+
+/**
+ * i40iw_ceq_dpc - dpc handler for CEQ
+ * @data: data points to CEQ
+ */
+static void i40iw_ceq_dpc(unsigned long data)
+{
+ struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+ struct i40iw_device *iwdev = iwceq->iwdev;
+
+ i40iw_process_ceq(iwdev, iwceq);
+ i40iw_enable_intr(&iwdev->sc_dev, iwceq->msix_idx);
+}
+
+/**
+ * i40iw_irq_handler - interrupt handler for aeq and ceq0
+ * @irq: Interrupt request number
+ * @data: iwarp device
+ */
+static irqreturn_t i40iw_irq_handler(int irq, void *data)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)data;
+
+ tasklet_schedule(&iwdev->dpc_tasklet);
+ return IRQ_HANDLED;
+}
+
+/**
+ * i40iw_destroy_cqp - destroy control qp
+ * @iwdev: iwarp device
+ * @create_done: 1 if cqp create poll was success
+ *
+ * Issue destroy cqp request and
+ * free the resources associated with the cqp
+ */
+static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
+{
+ enum i40iw_status_code status = 0;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_cqp *cqp = &iwdev->cqp;
+
+ if (free_hwcqp && dev->cqp_ops->cqp_destroy)
+ status = dev->cqp_ops->cqp_destroy(dev->cqp);
+ if (status)
+ i40iw_pr_err("destroy cqp failed");
+
+ i40iw_free_dma_mem(dev->hw, &cqp->sq);
+ kfree(cqp->scratch_array);
+ iwdev->cqp.scratch_array = NULL;
+
+ kfree(cqp->cqp_requests);
+ cqp->cqp_requests = NULL;
+}
+
+/**
+ * i40iw_disable_irqs - disable device interrupts
+ * @dev: hardware control device structure
+ * @msic_vec: msix vector to disable irq
+ * @dev_id: parameter to pass to free_irq (used during irq setup)
+ *
+ * The function is called when destroying aeq/ceq
+ */
+static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
+ struct i40iw_msix_vector *msix_vec,
+ void *dev_id)
+{
+ if (dev->is_pf)
+ i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
+ else
+ i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
+ synchronize_irq(msix_vec->irq);
+ free_irq(msix_vec->irq, dev_id);
+}
+
+/**
+ * i40iw_destroy_aeq - destroy aeq
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Issue a destroy aeq request and
+ * free the resources associated with the aeq
+ * The function is called during driver unload
+ */
+static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
+{
+ enum i40iw_status_code status = I40IW_ERR_NOT_READY;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_aeq *aeq = &iwdev->aeq;
+
+ if (!iwdev->msix_shared)
+ i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
+ if (reset)
+ goto exit;
+
+ if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
+ status = dev->aeq_ops->aeq_destroy_done(&aeq->sc_aeq);
+ if (status)
+ i40iw_pr_err("destroy aeq failed %d\n", status);
+
+exit:
+ i40iw_free_dma_mem(dev->hw, &aeq->mem);
+}
+
+/**
+ * i40iw_destroy_ceq - destroy ceq
+ * @iwdev: iwarp device
+ * @iwceq: ceq to be destroyed
+ * @reset: true if called before reset
+ *
+ * Issue a destroy ceq request and
+ * free the resources associated with the ceq
+ */
+static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
+ struct i40iw_ceq *iwceq,
+ bool reset)
+{
+ enum i40iw_status_code status;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+ if (reset)
+ goto exit;
+
+ status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
+ if (status) {
+ i40iw_pr_err("ceq destroy command failed %d\n", status);
+ goto exit;
+ }
+
+ status = dev->ceq_ops->cceq_destroy_done(&iwceq->sc_ceq);
+ if (status)
+ i40iw_pr_err("ceq destroy completion failed %d\n", status);
+exit:
+ i40iw_free_dma_mem(dev->hw, &iwceq->mem);
+}
+
+/**
+ * i40iw_dele_ceqs - destroy all ceq's
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Go through all of the device ceq's and for each ceq
+ * disable the ceq interrupt and destroy the ceq
+ */
+static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
+{
+ u32 i = 0;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_ceq *iwceq = iwdev->ceqlist;
+ struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
+
+ if (iwdev->msix_shared) {
+ i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
+ i40iw_destroy_ceq(iwdev, iwceq, reset);
+ iwceq++;
+ i++;
+ }
+
+ for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
+ i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
+ i40iw_destroy_ceq(iwdev, iwceq, reset);
+ }
+}
+
+/**
+ * i40iw_destroy_ccq - destroy control cq
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Issue destroy ccq request and
+ * free the resources associated with the ccq
+ */
+static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_ccq *ccq = &iwdev->ccq;
+ enum i40iw_status_code status = 0;
+
+ if (!reset)
+ status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
+ if (status)
+ i40iw_pr_err("ccq destroy failed %d\n", status);
+ i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
+}
+
+/* types of hmc objects */
+static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = {
+ I40IW_HMC_IW_QP,
+ I40IW_HMC_IW_CQ,
+ I40IW_HMC_IW_HTE,
+ I40IW_HMC_IW_ARP,
+ I40IW_HMC_IW_APBVT_ENTRY,
+ I40IW_HMC_IW_MR,
+ I40IW_HMC_IW_XF,
+ I40IW_HMC_IW_XFFL,
+ I40IW_HMC_IW_Q1,
+ I40IW_HMC_IW_Q1FL,
+ I40IW_HMC_IW_TIMER,
+};
+
+/**
+ * i40iw_close_hmc_objects_type - delete hmc objects of a given type
+ * @iwdev: iwarp device
+ * @obj_type: the hmc object type to be deleted
+ * @is_pf: true if the function is PF otherwise false
+ * @reset: true if called before reset
+ */
+static void i40iw_close_hmc_objects_type(struct i40iw_sc_dev *dev,
+ enum i40iw_hmc_rsrc_type obj_type,
+ struct i40iw_hmc_info *hmc_info,
+ bool is_pf,
+ bool reset)
+{
+ struct i40iw_hmc_del_obj_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.hmc_info = hmc_info;
+ info.rsrc_type = obj_type;
+ info.count = hmc_info->hmc_obj[obj_type].cnt;
+ info.is_pf = is_pf;
+ if (dev->hmc_ops->del_hmc_object(dev, &info, reset))
+ i40iw_pr_err("del obj of type %d failed\n", obj_type);
+}
+
+/**
+ * i40iw_del_hmc_objects - remove all device hmc objects
+ * @dev: iwarp device
+ * @hmc_info: hmc_info to free
+ * @is_pf: true if hmc_info belongs to PF, not vf nor allocated
+ * by PF on behalf of VF
+ * @reset: true if called before reset
+ */
+static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_info *hmc_info,
+ bool is_pf,
+ bool reset)
+{
+ unsigned int i;
+
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++)
+ i40iw_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, is_pf, reset);
+}
+
+/**
+ * i40iw_ceq_handler - interrupt handler for ceq
+ * @data: ceq pointer
+ */
+static irqreturn_t i40iw_ceq_handler(int irq, void *data)
+{
+ struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+
+ if (iwceq->irq != irq)
+ i40iw_pr_err("expected irq = %d received irq = %d\n", iwceq->irq, irq);
+ tasklet_schedule(&iwceq->dpc_tasklet);
+ return IRQ_HANDLED;
+}
+
+/**
+ * i40iw_create_hmc_obj_type - create hmc object of a given type
+ * @dev: hardware control device structure
+ * @info: information for the hmc object to create
+ */
+static enum i40iw_status_code i40iw_create_hmc_obj_type(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_create_obj_info *info)
+{
+ return dev->hmc_ops->create_hmc_object(dev, info);
+}
+
+/**
+ * i40iw_create_hmc_objs - create all hmc objects for the device
+ * @iwdev: iwarp device
+ * @is_pf: true if the function is PF otherwise false
+ *
+ * Create the device hmc objects and allocate hmc pages
+ * Return 0 if successful, otherwise clean up and return error
+ */
+static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
+ bool is_pf)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_hmc_create_obj_info info;
+ enum i40iw_status_code status;
+ int i;
+
+ memset(&info, 0, sizeof(info));
+ info.hmc_info = dev->hmc_info;
+ info.is_pf = is_pf;
+ info.entry_type = iwdev->sd_type;
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+ info.rsrc_type = iw_hmc_obj_types[i];
+ info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+ status = i40iw_create_hmc_obj_type(dev, &info);
+ if (status) {
+ i40iw_pr_err("create obj type %d status = %d\n",
+ iw_hmc_obj_types[i], status);
+ break;
+ }
+ }
+ if (!status)
+ return (dev->cqp_misc_ops->static_hmc_pages_allocated(dev->cqp, 0,
+ dev->hmc_fn_id,
+ true, true));
+
+ while (i) {
+ i--;
+ /* destroy the hmc objects of a given type */
+ i40iw_close_hmc_objects_type(dev,
+ iw_hmc_obj_types[i],
+ dev->hmc_info,
+ is_pf,
+ false);
+ }
+ return status;
+}
+
+/**
+ * i40iw_obj_aligned_mem - get aligned memory from device allocated memory
+ * @iwdev: iwarp device
+ * @memptr: points to the memory addresses
+ * @size: size of memory needed
+ * @mask: mask for the aligned memory
+ *
+ * Get aligned memory of the requested size and
+ * update the memptr to point to the new aligned memory
+ * Return 0 if successful, otherwise return no memory error
+ */
+enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
+ struct i40iw_dma_mem *memptr,
+ u32 size,
+ u32 mask)
+{
+ unsigned long va, newva;
+ unsigned long extra;
+
+ va = (unsigned long)iwdev->obj_next.va;
+ newva = va;
+ if (mask)
+ newva = ALIGN(va, (mask + 1));
+ extra = newva - va;
+ memptr->va = (u8 *)va + extra;
+ memptr->pa = iwdev->obj_next.pa + extra;
+ memptr->size = size;
+ if ((memptr->va + size) > (iwdev->obj_mem.va + iwdev->obj_mem.size))
+ return I40IW_ERR_NO_MEMORY;
+
+ iwdev->obj_next.va = memptr->va + size;
+ iwdev->obj_next.pa = memptr->pa + size;
+ return 0;
+}
+
+/**
+ * i40iw_create_cqp - create control qp
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the cqp and all the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
+{
+ enum i40iw_status_code status;
+ u32 sqsize = I40IW_CQP_SW_SQSIZE_2048;
+ struct i40iw_dma_mem mem;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_cqp_init_info cqp_init_info;
+ struct i40iw_cqp *cqp = &iwdev->cqp;
+ u16 maj_err, min_err;
+ int i;
+
+ cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
+ if (!cqp->cqp_requests)
+ return I40IW_ERR_NO_MEMORY;
+ cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
+ if (!cqp->scratch_array) {
+ kfree(cqp->cqp_requests);
+ return I40IW_ERR_NO_MEMORY;
+ }
+ dev->cqp = &cqp->sc_cqp;
+ dev->cqp->dev = dev;
+ memset(&cqp_init_info, 0, sizeof(cqp_init_info));
+ status = i40iw_allocate_dma_mem(dev->hw, &cqp->sq,
+ (sizeof(struct i40iw_cqp_sq_wqe) * sqsize),
+ I40IW_CQP_ALIGNMENT);
+ if (status)
+ goto exit;
+ status = i40iw_obj_aligned_mem(iwdev, &mem, sizeof(struct i40iw_cqp_ctx),
+ I40IW_HOST_CTX_ALIGNMENT_MASK);
+ if (status)
+ goto exit;
+ dev->cqp->host_ctx_pa = mem.pa;
+ dev->cqp->host_ctx = mem.va;
+ /* populate the cqp init info */
+ cqp_init_info.dev = dev;
+ cqp_init_info.sq_size = sqsize;
+ cqp_init_info.sq = cqp->sq.va;
+ cqp_init_info.sq_pa = cqp->sq.pa;
+ cqp_init_info.host_ctx_pa = mem.pa;
+ cqp_init_info.host_ctx = mem.va;
+ cqp_init_info.hmc_profile = iwdev->resource_profile;
+ cqp_init_info.enabled_vf_count = iwdev->max_rdma_vfs;
+ cqp_init_info.scratch_array = cqp->scratch_array;
+ status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
+ if (status) {
+ i40iw_pr_err("cqp init status %d maj_err %d min_err %d\n",
+ status, maj_err, min_err);
+ goto exit;
+ }
+ status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
+ if (status) {
+ i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
+ status, maj_err, min_err);
+ goto exit;
+ }
+ spin_lock_init(&cqp->req_lock);
+ INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
+ INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
+ /* init the waitq of the cqp_requests and add them to the list */
+ for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) {
+ init_waitqueue_head(&cqp->cqp_requests[i].waitq);
+ list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
+ }
+ return 0;
+exit:
+ /* clean up the created resources */
+ i40iw_destroy_cqp(iwdev, false);
+ return status;
+}
+
+/**
+ * i40iw_create_ccq - create control cq
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the ccq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_ccq(struct i40iw_device *iwdev)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_dma_mem mem;
+ enum i40iw_status_code status;
+ struct i40iw_ccq_init_info info;
+ struct i40iw_ccq *ccq = &iwdev->ccq;
+
+ memset(&info, 0, sizeof(info));
+ dev->ccq = &ccq->sc_cq;
+ dev->ccq->dev = dev;
+ info.dev = dev;
+ ccq->shadow_area.size = sizeof(struct i40iw_cq_shadow_area);
+ ccq->mem_cq.size = sizeof(struct i40iw_cqe) * IW_CCQ_SIZE;
+ status = i40iw_allocate_dma_mem(dev->hw, &ccq->mem_cq,
+ ccq->mem_cq.size, I40IW_CQ0_ALIGNMENT);
+ if (status)
+ goto exit;
+ status = i40iw_obj_aligned_mem(iwdev, &mem, ccq->shadow_area.size,
+ I40IW_SHADOWAREA_MASK);
+ if (status)
+ goto exit;
+ ccq->sc_cq.back_cq = (void *)ccq;
+ /* populate the ccq init info */
+ info.cq_base = ccq->mem_cq.va;
+ info.cq_pa = ccq->mem_cq.pa;
+ info.num_elem = IW_CCQ_SIZE;
+ info.shadow_area = mem.va;
+ info.shadow_area_pa = mem.pa;
+ info.ceqe_mask = false;
+ info.ceq_id_valid = true;
+ info.shadow_read_threshold = 16;
+ status = dev->ccq_ops->ccq_init(dev->ccq, &info);
+ if (!status)
+ status = dev->ccq_ops->ccq_create(dev->ccq, 0, true, true);
+exit:
+ if (status)
+ i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
+ return status;
+}
+
+/**
+ * i40iw_configure_ceq_vector - set up the msix interrupt vector for ceq
+ * @iwdev: iwarp device
+ * @msix_vec: interrupt vector information
+ * @iwceq: ceq associated with the vector
+ * @ceq_id: the id number of the iwceq
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iwdev,
+ struct i40iw_ceq *iwceq,
+ u32 ceq_id,
+ struct i40iw_msix_vector *msix_vec)
+{
+ enum i40iw_status_code status;
+
+ if (iwdev->msix_shared && !ceq_id) {
+ tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+ status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
+ } else {
+ tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
+ status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
+ }
+
+ if (status) {
+ i40iw_pr_err("ceq irq config fail\n");
+ return I40IW_ERR_CONFIG;
+ }
+ msix_vec->ceq_id = ceq_id;
+ msix_vec->cpu_affinity = 0;
+
+ return 0;
+}
+
+/**
+ * i40iw_create_ceq - create completion event queue
+ * @iwdev: iwarp device
+ * @iwceq: pointer to the ceq resources to be created
+ * @ceq_id: the id number of the iwceq
+ *
+ * Return 0, if the ceq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_ceq(struct i40iw_device *iwdev,
+ struct i40iw_ceq *iwceq,
+ u32 ceq_id)
+{
+ enum i40iw_status_code status;
+ struct i40iw_ceq_init_info info;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ u64 scratch;
+
+ memset(&info, 0, sizeof(info));
+ info.ceq_id = ceq_id;
+ iwceq->iwdev = iwdev;
+ iwceq->mem.size = sizeof(struct i40iw_ceqe) *
+ iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+ status = i40iw_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size,
+ I40IW_CEQ_ALIGNMENT);
+ if (status)
+ goto exit;
+ info.ceq_id = ceq_id;
+ info.ceqe_base = iwceq->mem.va;
+ info.ceqe_pa = iwceq->mem.pa;
+
+ info.elem_cnt = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+ iwceq->sc_ceq.ceq_id = ceq_id;
+ info.dev = dev;
+ scratch = (uintptr_t)&iwdev->cqp.sc_cqp;
+ status = dev->ceq_ops->ceq_init(&iwceq->sc_ceq, &info);
+ if (!status)
+ status = dev->ceq_ops->cceq_create(&iwceq->sc_ceq, scratch);
+
+exit:
+ if (status)
+ i40iw_free_dma_mem(dev->hw, &iwceq->mem);
+ return status;
+}
+
+void i40iw_request_reset(struct i40iw_device *iwdev)
+{
+ struct i40e_info *ldev = iwdev->ldev;
+
+ ldev->ops->request_reset(ldev, iwdev->client, 1);
+}
+
+/**
+ * i40iw_setup_ceqs - manage the device ceq's and their interrupt resources
+ * @iwdev: iwarp device
+ * @ldev: i40e lan device
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq's and configure their msix interrupt vectors
+ * Return 0, if at least one ceq is successfully set up, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
+ struct i40e_info *ldev)
+{
+ u32 i;
+ u32 ceq_id;
+ struct i40iw_ceq *iwceq;
+ struct i40iw_msix_vector *msix_vec;
+ enum i40iw_status_code status = 0;
+ u32 num_ceqs;
+
+ if (ldev && ldev->ops && ldev->ops->setup_qvlist) {
+ status = ldev->ops->setup_qvlist(ldev, &i40iw_client,
+ iwdev->iw_qvlist);
+ if (status)
+ goto exit;
+ } else {
+ status = I40IW_ERR_BAD_PTR;
+ goto exit;
+ }
+
+ num_ceqs = min(iwdev->msix_count, iwdev->sc_dev.hmc_fpm_misc.max_ceqs);
+ iwdev->ceqlist = kcalloc(num_ceqs, sizeof(*iwdev->ceqlist), GFP_KERNEL);
+ if (!iwdev->ceqlist) {
+ status = I40IW_ERR_NO_MEMORY;
+ goto exit;
+ }
+ i = (iwdev->msix_shared) ? 0 : 1;
+ for (ceq_id = 0; i < num_ceqs; i++, ceq_id++) {
+ iwceq = &iwdev->ceqlist[ceq_id];
+ status = i40iw_create_ceq(iwdev, iwceq, ceq_id);
+ if (status) {
+ i40iw_pr_err("create ceq status = %d\n", status);
+ break;
+ }
+
+ msix_vec = &iwdev->iw_msixtbl[i];
+ iwceq->irq = msix_vec->irq;
+ iwceq->msix_idx = msix_vec->idx;
+ status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
+ if (status) {
+ i40iw_destroy_ceq(iwdev, iwceq, false);
+ break;
+ }
+ i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
+ iwdev->ceqs_count++;
+ }
+
+exit:
+ if (status) {
+ if (!iwdev->ceqs_count) {
+ kfree(iwdev->ceqlist);
+ iwdev->ceqlist = NULL;
+ } else {
+ status = 0;
+ }
+ }
+ return status;
+}
+
+/**
+ * i40iw_configure_aeq_vector - set up the msix vector for aeq
+ * @iwdev: iwarp device
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iwdev)
+{
+ struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
+ u32 ret = 0;
+
+ if (!iwdev->msix_shared) {
+ tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+ ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
+ }
+ if (ret) {
+ i40iw_pr_err("aeq irq config fail\n");
+ return I40IW_ERR_CONFIG;
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_create_aeq - create async event queue
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the aeq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_aeq(struct i40iw_device *iwdev)
+{
+ enum i40iw_status_code status;
+ struct i40iw_aeq_init_info info;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_aeq *aeq = &iwdev->aeq;
+ u64 scratch = 0;
+ u32 aeq_size;
+
+ aeq_size = 2 * iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt +
+ iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+ memset(&info, 0, sizeof(info));
+ aeq->mem.size = sizeof(struct i40iw_sc_aeqe) * aeq_size;
+ status = i40iw_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size,
+ I40IW_AEQ_ALIGNMENT);
+ if (status)
+ goto exit;
+
+ info.aeqe_base = aeq->mem.va;
+ info.aeq_elem_pa = aeq->mem.pa;
+ info.elem_cnt = aeq_size;
+ info.dev = dev;
+ status = dev->aeq_ops->aeq_init(&aeq->sc_aeq, &info);
+ if (status)
+ goto exit;
+ status = dev->aeq_ops->aeq_create(&aeq->sc_aeq, scratch, 1);
+ if (!status)
+ status = dev->aeq_ops->aeq_create_done(&aeq->sc_aeq);
+exit:
+ if (status)
+ i40iw_free_dma_mem(dev->hw, &aeq->mem);
+ return status;
+}
+
+/**
+ * i40iw_setup_aeq - set up the device aeq
+ * @iwdev: iwarp device
+ *
+ * Create the aeq and configure its msix interrupt vector
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ enum i40iw_status_code status;
+
+ status = i40iw_create_aeq(iwdev);
+ if (status)
+ return status;
+
+ status = i40iw_configure_aeq_vector(iwdev);
+ if (status) {
+ i40iw_destroy_aeq(iwdev, false);
+ return status;
+ }
+
+ if (!iwdev->msix_shared)
+ i40iw_enable_intr(dev, iwdev->iw_msixtbl[0].idx);
+ return 0;
+}
+
+/**
+ * i40iw_initialize_ilq - create iwarp local queue for cm
+ * @iwdev: iwarp device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
+{
+ struct i40iw_puda_rsrc_info info;
+ enum i40iw_status_code status;
+
+ info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
+ info.cq_id = 1;
+ info.qp_id = 0;
+ info.count = 1;
+ info.pd_id = 1;
+ info.sq_size = 8192;
+ info.rq_size = 8192;
+ info.buf_size = 1024;
+ info.tx_buf_cnt = 16384;
+ info.mss = iwdev->mss;
+ info.receive = i40iw_receive_ilq;
+ info.xmit_complete = i40iw_free_sqbuf;
+ status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+ if (status)
+ i40iw_pr_err("ilq create fail\n");
+ return status;
+}
+
+/**
+ * i40iw_initialize_ieq - create iwarp exception queue
+ * @iwdev: iwarp device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
+{
+ struct i40iw_puda_rsrc_info info;
+ enum i40iw_status_code status;
+
+ info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
+ info.cq_id = 2;
+ info.qp_id = iwdev->sc_dev.exception_lan_queue;
+ info.count = 1;
+ info.pd_id = 2;
+ info.sq_size = 8192;
+ info.rq_size = 8192;
+ info.buf_size = 2048;
+ info.mss = iwdev->mss;
+ info.tx_buf_cnt = 16384;
+ status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+ if (status)
+ i40iw_pr_err("ieq create fail\n");
+ return status;
+}
+
+/**
+ * i40iw_hmc_setup - create hmc objects for the device
+ * @iwdev: iwarp device
+ *
+ * Set up the device private memory space for the number and size of
+ * the hmc objects and create the objects
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_hmc_setup(struct i40iw_device *iwdev)
+{
+ enum i40iw_status_code status;
+
+ iwdev->sd_type = I40IW_SD_TYPE_DIRECT;
+ status = i40iw_config_fpm_values(&iwdev->sc_dev, IW_CFG_FPM_QP_COUNT);
+ if (status)
+ goto exit;
+ status = i40iw_create_hmc_objs(iwdev, true);
+ if (status)
+ goto exit;
+ iwdev->init_state = HMC_OBJS_CREATED;
+exit:
+ return status;
+}
+
+/**
+ * i40iw_del_init_mem - deallocate memory resources
+ * @iwdev: iwarp device
+ */
+static void i40iw_del_init_mem(struct i40iw_device *iwdev)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+ i40iw_free_dma_mem(&iwdev->hw, &iwdev->obj_mem);
+ kfree(dev->hmc_info->sd_table.sd_entry);
+ dev->hmc_info->sd_table.sd_entry = NULL;
+ kfree(iwdev->mem_resources);
+ iwdev->mem_resources = NULL;
+ kfree(iwdev->ceqlist);
+ iwdev->ceqlist = NULL;
+ kfree(iwdev->iw_msixtbl);
+ iwdev->iw_msixtbl = NULL;
+ kfree(iwdev->hmc_info_mem);
+ iwdev->hmc_info_mem = NULL;
+}
+
+/**
+ * i40iw_del_macip_entry - remove a mac ip address entry from the hw table
+ * @iwdev: iwarp device
+ * @idx: the index of the mac ip address to delete
+ */
+static void i40iw_del_macip_entry(struct i40iw_device *iwdev, u8 idx)
+{
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status = 0;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request) {
+ i40iw_pr_err("cqp_request memory failed\n");
+ return;
+ }
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_DELETE_LOCAL_MAC_IPADDR_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.del_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.del_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.del_local_mac_ipaddr_entry.entry_idx = idx;
+ cqp_info->in.u.del_local_mac_ipaddr_entry.ignore_ref_count = 0;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Del MAC Ip entry fail");
+}
+
+/**
+ * i40iw_add_mac_ipaddr_entry - add a mac ip address entry to the hw table
+ * @iwdev: iwarp device
+ * @mac_addr: pointer to mac address
+ * @idx: the index of the mac ip address to add
+ */
+static enum i40iw_status_code i40iw_add_mac_ipaddr_entry(struct i40iw_device *iwdev,
+ u8 *mac_addr,
+ u8 idx)
+{
+ struct i40iw_local_mac_ipaddr_entry_info *info;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status = 0;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request) {
+ i40iw_pr_err("cqp_request memory failed\n");
+ return I40IW_ERR_NO_MEMORY;
+ }
+
+ cqp_info = &cqp_request->info;
+
+ cqp_info->post_sq = 1;
+ info = &cqp_info->in.u.add_local_mac_ipaddr_entry.info;
+ ether_addr_copy(info->mac_addr, mac_addr);
+ info->entry_idx = idx;
+ cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->cqp_cmd = OP_ADD_LOCAL_MAC_IPADDR_ENTRY;
+ cqp_info->in.u.add_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Add MAC Ip entry fail");
+ return status;
+}
+
+/**
+ * i40iw_alloc_local_mac_ipaddr_entry - allocate a mac ip address entry
+ * @iwdev: iwarp device
+ * @mac_ip_tbl_idx: the index of the new mac ip address
+ *
+ * Allocate a mac ip address entry and update the mac_ip_tbl_idx
+ * to hold the index of the newly created mac ip address
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_alloc_local_mac_ipaddr_entry(struct i40iw_device *iwdev,
+ u16 *mac_ip_tbl_idx)
+{
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status = 0;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request) {
+ i40iw_pr_err("cqp_request memory failed\n");
+ return I40IW_ERR_NO_MEMORY;
+ }
+
+ /* increment refcount, because we need the cqp request ret value */
+ atomic_inc(&cqp_request->refcount);
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.alloc_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (!status)
+ *mac_ip_tbl_idx = cqp_request->compl_info.op_ret_val;
+ else
+ i40iw_pr_err("CQP-OP Alloc MAC Ip entry fail");
+ /* decrement refcount and free the cqp request, if no longer used */
+ i40iw_put_cqp_request(iwcqp, cqp_request);
+ return status;
+}
+
+/**
+ * i40iw_alloc_set_mac_ipaddr - set up a mac ip address table entry
+ * @iwdev: iwarp device
+ * @macaddr: pointer to mac address
+ *
+ * Allocate a mac ip address entry and add it to the hw table
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iwdev,
+ u8 *macaddr)
+{
+ enum i40iw_status_code status;
+
+ status = i40iw_alloc_local_mac_ipaddr_entry(iwdev, &iwdev->mac_ip_table_idx);
+ if (!status) {
+ status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
+ (u8)iwdev->mac_ip_table_idx);
+ if (!status)
+ status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
+ (u8)iwdev->mac_ip_table_idx);
+ else
+ i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+ }
+ return status;
+}
+
+/**
+ * i40iw_add_ipv6_addr - add ipv6 address to the hw arp table
+ * @iwdev: iwarp device
+ */
+static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp;
+ __be32 local_ipaddr6[4];
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, ip_dev) {
+ if ((((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF) &&
+ (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
+ (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ i40iw_pr_err("ipv6 inet device not found\n");
+ break;
+ }
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
+ rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
+ i40iw_copy_ip_ntohl(local_ipaddr6,
+ ifp->addr.in6_u.u6_addr32);
+ i40iw_manage_arp_cache(iwdev,
+ ip_dev->dev_addr,
+ local_ipaddr6,
+ false,
+ I40IW_ARP_ADD);
+ }
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * i40iw_add_ipv4_addr - add ipv4 address to the hw arp table
+ * @iwdev: iwarp device
+ */
+static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
+{
+ struct net_device *dev;
+ struct in_device *idev;
+ bool got_lock = true;
+ u32 ip_addr;
+
+ if (!rtnl_trylock())
+ got_lock = false;
+
+ for_each_netdev(&init_net, dev) {
+ if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) &&
+ (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
+ (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+ idev = in_dev_get(dev);
+ for_ifa(idev) {
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+ "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
+ rdma_vlan_dev_vlan_id(dev), dev->dev_addr);
+
+ ip_addr = ntohl(ifa->ifa_address);
+ i40iw_manage_arp_cache(iwdev,
+ dev->dev_addr,
+ &ip_addr,
+ true,
+ I40IW_ARP_ADD);
+ }
+ endfor_ifa(idev);
+ in_dev_put(idev);
+ }
+ }
+ if (got_lock)
+ rtnl_unlock();
+}
+
+/**
+ * i40iw_add_mac_ip - add mac and ip addresses
+ * @iwdev: iwarp device
+ *
+ * Create and add a mac ip address entry to the hw table and
+ * ipv4/ipv6 addresses to the arp cache
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_add_mac_ip(struct i40iw_device *iwdev)
+{
+ struct net_device *netdev = iwdev->netdev;
+ enum i40iw_status_code status;
+
+ status = i40iw_alloc_set_mac_ipaddr(iwdev, (u8 *)netdev->dev_addr);
+ if (status)
+ return status;
+ i40iw_add_ipv4_addr(iwdev);
+ i40iw_add_ipv6_addr(iwdev);
+ return 0;
+}
+
+/**
+ * i40iw_wait_pe_ready - Check if firmware is ready
+ * @hw: provides access to registers
+ */
+static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
+{
+ u32 statusfw;
+ u32 statuscpu0;
+ u32 statuscpu1;
+ u32 statuscpu2;
+ u32 retrycount = 0;
+
+ do {
+ statusfw = i40iw_rd32(hw, I40E_GLPE_FWLDSTATUS);
+ i40iw_pr_info("[%04d] fm load status[x%04X]\n", __LINE__, statusfw);
+ statuscpu0 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS0);
+ i40iw_pr_info("[%04d] CSR_CQP status[x%04X]\n", __LINE__, statuscpu0);
+ statuscpu1 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS1);
+ i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS1 status[x%04X]\n",
+ __LINE__, statuscpu1);
+ statuscpu2 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS2);
+ i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS2 status[x%04X]\n",
+ __LINE__, statuscpu2);
+ if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
+ break; /* SUCCESS */
+ mdelay(1000);
+ retrycount++;
+ } while (retrycount < 14);
+ i40iw_wr32(hw, 0xb4040, 0x4C104C5);
+}
+
+/**
+ * i40iw_initialize_dev - initialize device
+ * @iwdev: iwarp device
+ * @ldev: lan device information
+ *
+ * Allocate memory for the hmc objects and initialize iwdev
+ * Return 0 if successful, otherwise clean up the resources
+ * and return error
+ */
+static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
+ struct i40e_info *ldev)
+{
+ enum i40iw_status_code status;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_device_init_info info;
+ struct i40iw_dma_mem mem;
+ u32 size;
+
+ memset(&info, 0, sizeof(info));
+ size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
+ (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
+ iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
+ if (!iwdev->hmc_info_mem) {
+ i40iw_pr_err("memory alloc fail\n");
+ return I40IW_ERR_NO_MEMORY;
+ }
+ iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
+ dev->hmc_info = &iwdev->hw.hmc;
+ dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
+ status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
+ I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
+ if (status)
+ goto exit;
+ info.fpm_query_buf_pa = mem.pa;
+ info.fpm_query_buf = mem.va;
+ status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
+ I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
+ if (status)
+ goto exit;
+ info.fpm_commit_buf_pa = mem.pa;
+ info.fpm_commit_buf = mem.va;
+ info.hmc_fn_id = ldev->fid;
+ info.is_pf = (ldev->ftype) ? false : true;
+ info.bar0 = ldev->hw_addr;
+ info.hw = &iwdev->hw;
+ info.debug_mask = debug;
+ info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle;
+ info.exception_lan_queue = 1;
+ info.vchnl_send = i40iw_virtchnl_send;
+ status = i40iw_device_init(&iwdev->sc_dev, &info);
+exit:
+ if (status) {
+ kfree(iwdev->hmc_info_mem);
+ iwdev->hmc_info_mem = NULL;
+ }
+ return status;
+}
+
+/**
+ * i40iw_register_notifiers - register tcp ip notifiers
+ */
+static void i40iw_register_notifiers(void)
+{
+ if (!i40iw_notifiers_registered) {
+ register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+ register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+ register_netevent_notifier(&i40iw_net_notifier);
+ }
+ i40iw_notifiers_registered++;
+}
+
+/**
+ * i40iw_save_msix_info - copy msix vector information to iwarp device
+ * @iwdev: iwarp device
+ * @ldev: lan device information
+ *
+ * Allocate iwdev msix table and copy the ldev msix info to the table
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
+ struct i40e_info *ldev)
+{
+ struct i40e_qvlist_info *iw_qvlist;
+ struct i40e_qv_info *iw_qvinfo;
+ u32 ceq_idx;
+ u32 i;
+ u32 size;
+
+ iwdev->msix_count = ldev->msix_count;
+
+ size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
+ size += sizeof(struct i40e_qvlist_info);
+ size += sizeof(struct i40e_qv_info) * iwdev->msix_count - 1;
+ iwdev->iw_msixtbl = kzalloc(size, GFP_KERNEL);
+
+ if (!iwdev->iw_msixtbl)
+ return I40IW_ERR_NO_MEMORY;
+ iwdev->iw_qvlist = (struct i40e_qvlist_info *)(&iwdev->iw_msixtbl[iwdev->msix_count]);
+ iw_qvlist = iwdev->iw_qvlist;
+ iw_qvinfo = iw_qvlist->qv_info;
+ iw_qvlist->num_vectors = iwdev->msix_count;
+ if (iwdev->msix_count <= num_online_cpus())
+ iwdev->msix_shared = true;
+ for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
+ iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
+ iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
+ if (i == 0) {
+ iw_qvinfo->aeq_idx = 0;
+ if (iwdev->msix_shared)
+ iw_qvinfo->ceq_idx = ceq_idx++;
+ else
+ iw_qvinfo->ceq_idx = I40E_QUEUE_INVALID_IDX;
+ } else {
+ iw_qvinfo->aeq_idx = I40E_QUEUE_INVALID_IDX;
+ iw_qvinfo->ceq_idx = ceq_idx++;
+ }
+ iw_qvinfo->itr_idx = 3;
+ iw_qvinfo->v_idx = iwdev->iw_msixtbl[i].idx;
+ }
+ return 0;
+}
+
+/**
+ * i40iw_deinit_device - clean up the device resources
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ * @del_hdl: true if delete hdl entry
+ *
+ * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
+ * destroy the device queues and free the pble and the hmc objects
+ */
+static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl)
+{
+ struct i40e_info *ldev = iwdev->ldev;
+
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+ i40iw_pr_info("state = %d\n", iwdev->init_state);
+
+ switch (iwdev->init_state) {
+ case RDMA_DEV_REGISTERED:
+ iwdev->iw_status = 0;
+ i40iw_port_ibevent(iwdev);
+ i40iw_destroy_rdma_device(iwdev->iwibdev);
+ /* fallthrough */
+ case IP_ADDR_REGISTERED:
+ if (!reset)
+ i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+ /* fallthrough */
+ case INET_NOTIFIER:
+ if (i40iw_notifiers_registered > 0) {
+ i40iw_notifiers_registered--;
+ unregister_netevent_notifier(&i40iw_net_notifier);
+ unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+ unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+ }
+ /* fallthrough */
+ case CEQ_CREATED:
+ i40iw_dele_ceqs(iwdev, reset);
+ /* fallthrough */
+ case AEQ_CREATED:
+ i40iw_destroy_aeq(iwdev, reset);
+ /* fallthrough */
+ case IEQ_CREATED:
+ i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+ /* fallthrough */
+ case ILQ_CREATED:
+ i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+ /* fallthrough */
+ case CCQ_CREATED:
+ i40iw_destroy_ccq(iwdev, reset);
+ /* fallthrough */
+ case PBLE_CHUNK_MEM:
+ i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
+ /* fallthrough */
+ case HMC_OBJS_CREATED:
+ i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
+ /* fallthrough */
+ case CQP_CREATED:
+ i40iw_destroy_cqp(iwdev, !reset);
+ /* fallthrough */
+ case INITIAL_STATE:
+ i40iw_cleanup_cm_core(&iwdev->cm_core);
+ if (dev->is_pf)
+ i40iw_hw_stats_del_timer(dev);
+
+ i40iw_del_init_mem(iwdev);
+ break;
+ case INVALID_STATE:
+ /* fallthrough */
+ default:
+ i40iw_pr_err("bad init_state = %d\n", iwdev->init_state);
+ break;
+ }
+
+ if (del_hdl)
+ i40iw_del_handler(i40iw_find_i40e_handler(ldev));
+ kfree(iwdev->hdl);
+}
+
+/**
+ * i40iw_setup_init_state - set up the initial device struct
+ * @hdl: handler for iwarp device - one per instance
+ * @ldev: lan device information
+ * @client: iwarp client information, provided during registration
+ *
+ * Initialize the iwarp device and its hdl information
+ * using the ldev and client information
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
+ struct i40e_info *ldev,
+ struct i40e_client *client)
+{
+ struct i40iw_device *iwdev = &hdl->device;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ enum i40iw_status_code status;
+
+ memcpy(&hdl->ldev, ldev, sizeof(*ldev));
+ if (resource_profile == 1)
+ resource_profile = 2;
+
+ iwdev->mpa_version = mpa_version;
+ iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
+ (u8)resource_profile + I40IW_HMC_PROFILE_DEFAULT :
+ I40IW_HMC_PROFILE_DEFAULT;
+ iwdev->max_rdma_vfs =
+ (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0;
+ iwdev->netdev = ldev->netdev;
+ hdl->client = client;
+ iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
+ if (!ldev->ftype)
+ iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
+ else
+ iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_VF_DB_ADDR_OFFSET;
+
+ status = i40iw_save_msix_info(iwdev, ldev);
+ if (status)
+ goto exit;
+ iwdev->hw.dev_context = (void *)ldev->pcidev;
+ iwdev->hw.hw_addr = ldev->hw_addr;
+ status = i40iw_allocate_dma_mem(&iwdev->hw,
+ &iwdev->obj_mem, 8192, 4096);
+ if (status)
+ goto exit;
+ iwdev->obj_next = iwdev->obj_mem;
+ iwdev->push_mode = push_mode;
+ init_waitqueue_head(&iwdev->vchnl_waitq);
+ status = i40iw_initialize_dev(iwdev, ldev);
+exit:
+ if (status) {
+ kfree(iwdev->iw_msixtbl);
+ i40iw_free_dma_mem(dev->hw, &iwdev->obj_mem);
+ iwdev->iw_msixtbl = NULL;
+ }
+ return status;
+}
+
+/**
+ * i40iw_open - client interface operation open for iwarp/uda device
+ * @ldev: lan device information
+ * @client: iwarp client information, provided during registration
+ *
+ * Called by the lan driver during the processing of client register
+ * Create device resources, set up queues, pble and hmc objects and
+ * register the device with the ib verbs interface
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
+{
+ struct i40iw_device *iwdev;
+ struct i40iw_sc_dev *dev;
+ enum i40iw_status_code status;
+ struct i40iw_handler *hdl;
+
+ hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
+ if (!hdl)
+ return -ENOMEM;
+ iwdev = &hdl->device;
+ iwdev->hdl = hdl;
+ dev = &iwdev->sc_dev;
+ i40iw_setup_cm_core(iwdev);
+
+ dev->back_dev = (void *)iwdev;
+ iwdev->ldev = &hdl->ldev;
+ iwdev->client = client;
+ mutex_init(&iwdev->pbl_mutex);
+ i40iw_add_handler(hdl);
+
+ do {
+ status = i40iw_setup_init_state(hdl, ldev, client);
+ if (status)
+ break;
+ iwdev->init_state = INITIAL_STATE;
+ if (dev->is_pf)
+ i40iw_wait_pe_ready(dev->hw);
+ status = i40iw_create_cqp(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = CQP_CREATED;
+ status = i40iw_hmc_setup(iwdev);
+ if (status)
+ break;
+ status = i40iw_create_ccq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = CCQ_CREATED;
+ status = i40iw_initialize_ilq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = ILQ_CREATED;
+ status = i40iw_initialize_ieq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = IEQ_CREATED;
+ status = i40iw_setup_aeq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = AEQ_CREATED;
+ status = i40iw_setup_ceqs(iwdev, ldev);
+ if (status)
+ break;
+ iwdev->init_state = CEQ_CREATED;
+ status = i40iw_initialize_hw_resources(iwdev);
+ if (status)
+ break;
+ dev->ccq_ops->ccq_arm(dev->ccq);
+ status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
+ if (status)
+ break;
+ iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch");
+ i40iw_register_notifiers();
+ iwdev->init_state = INET_NOTIFIER;
+ status = i40iw_add_mac_ip(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = IP_ADDR_REGISTERED;
+ if (i40iw_register_rdma_device(iwdev)) {
+ i40iw_pr_err("register rdma device fail\n");
+ break;
+ };
+
+ iwdev->init_state = RDMA_DEV_REGISTERED;
+ iwdev->iw_status = 1;
+ i40iw_port_ibevent(iwdev);
+ i40iw_pr_info("i40iw_open completed\n");
+ return 0;
+ } while (0);
+
+ i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
+ i40iw_deinit_device(iwdev, false, false);
+ return -ERESTART;
+}
+
+/**
+ * i40iw_l2param_change : handle qs handles for qos and mss change
+ * @ldev: lan device information
+ * @client: client for paramater change
+ * @params: new parameters from L2
+ */
+static void i40iw_l2param_change(struct i40e_info *ldev,
+ struct i40e_client *client,
+ struct i40e_params *params)
+{
+ struct i40iw_handler *hdl;
+ struct i40iw_device *iwdev;
+
+ hdl = i40iw_find_i40e_handler(ldev);
+ if (!hdl)
+ return;
+
+ iwdev = &hdl->device;
+ if (params->mtu)
+ iwdev->mss = params->mtu - I40IW_MTU_TO_MSS;
+}
+
+/**
+ * i40iw_close - client interface operation close for iwarp/uda device
+ * @ldev: lan device information
+ * @client: client to close
+ *
+ * Called by the lan driver during the processing of client unregister
+ * Destroy and clean up the driver resources
+ */
+static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool reset)
+{
+ struct i40iw_device *iwdev;
+ struct i40iw_handler *hdl;
+
+ hdl = i40iw_find_i40e_handler(ldev);
+ if (!hdl)
+ return;
+
+ iwdev = &hdl->device;
+ destroy_workqueue(iwdev->virtchnl_wq);
+ i40iw_deinit_device(iwdev, reset, true);
+}
+
+/**
+ * i40iw_vf_reset - process VF reset
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id
+ *
+ * Called when a VF is reset by the PF
+ * Destroy and clean up the VF resources
+ */
+static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u32 vf_id)
+{
+ struct i40iw_handler *hdl;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_hmc_fcn_info hmc_fcn_info;
+ struct i40iw_virt_mem vf_dev_mem;
+ struct i40iw_vfdev *tmp_vfdev;
+ unsigned int i;
+ unsigned long flags;
+
+ hdl = i40iw_find_i40e_handler(ldev);
+ if (!hdl)
+ return;
+
+ dev = &hdl->device.sc_dev;
+
+ for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
+ if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
+ continue;
+
+ /* free all resources allocated on behalf of vf */
+ tmp_vfdev = dev->vf_dev[i];
+ spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+ dev->vf_dev[i] = NULL;
+ spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+ i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
+ /* remove vf hmc function */
+ memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
+ hmc_fcn_info.vf_id = vf_id;
+ hmc_fcn_info.iw_vf_idx = tmp_vfdev->iw_vf_idx;
+ hmc_fcn_info.free_fcn = true;
+ i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
+ /* free vf_dev */
+ vf_dev_mem.va = tmp_vfdev;
+ vf_dev_mem.size = sizeof(struct i40iw_vfdev) +
+ sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX;
+ i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
+ break;
+ }
+}
+
+/**
+ * i40iw_vf_enable - enable a number of VFs
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @num_vfs: number of VFs for the PF
+ *
+ * Called when the number of VFs changes
+ */
+static void i40iw_vf_enable(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 num_vfs)
+{
+ struct i40iw_handler *hdl;
+
+ hdl = i40iw_find_i40e_handler(ldev);
+ if (!hdl)
+ return;
+
+ if (num_vfs > I40IW_MAX_PE_ENABLED_VF_COUNT)
+ hdl->device.max_enabled_vfs = I40IW_MAX_PE_ENABLED_VF_COUNT;
+ else
+ hdl->device.max_enabled_vfs = num_vfs;
+}
+
+/**
+ * i40iw_vf_capable - check if VF capable
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id
+ *
+ * Return 1 if a VF slot is available or if VF is already RDMA enabled
+ * Return 0 otherwise
+ */
+static int i40iw_vf_capable(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 vf_id)
+{
+ struct i40iw_handler *hdl;
+ struct i40iw_sc_dev *dev;
+ unsigned int i;
+
+ hdl = i40iw_find_i40e_handler(ldev);
+ if (!hdl)
+ return 0;
+
+ dev = &hdl->device.sc_dev;
+
+ for (i = 0; i < hdl->device.max_enabled_vfs; i++) {
+ if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id == vf_id))
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_virtchnl_receive - receive a message through the virtual channel
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id associated with the message
+ * @msg: message buffer pointer
+ * @len: length of the message
+ *
+ * Invoke virtual channel receive operation for the given msg
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_virtchnl_receive(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 vf_id,
+ u8 *msg,
+ u16 len)
+{
+ struct i40iw_handler *hdl;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_device *iwdev;
+ int ret_code = I40IW_NOT_SUPPORTED;
+
+ if (!len || !msg)
+ return I40IW_ERR_PARAM;
+
+ hdl = i40iw_find_i40e_handler(ldev);
+ if (!hdl)
+ return I40IW_ERR_PARAM;
+
+ dev = &hdl->device.sc_dev;
+ iwdev = dev->back_dev;
+
+ i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len);
+
+ if (dev->vchnl_if.vchnl_recv) {
+ ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
+ if (!dev->is_pf) {
+ atomic_dec(&iwdev->vchnl_msgs);
+ wake_up(&iwdev->vchnl_waitq);
+ }
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_virtchnl_send - send a message through the virtual channel
+ * @dev: iwarp device
+ * @vf_id: virtual function id associated with the message
+ * @msg: virtual channel message buffer pointer
+ * @len: length of the message
+ *
+ * Invoke virtual channel send operation for the given msg
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ u8 *msg,
+ u16 len)
+{
+ struct i40iw_device *iwdev;
+ struct i40e_info *ldev;
+ enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR;
+
+ if (!dev || !dev->back_dev)
+ return ret_code;
+
+ iwdev = dev->back_dev;
+ ldev = iwdev->ldev;
+
+ if (ldev && ldev->ops && ldev->ops->virtchnl_send)
+ ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
+
+ return ret_code;
+}
+
+/* client interface functions */
+static struct i40e_client_ops i40e_ops = {
+ .open = i40iw_open,
+ .close = i40iw_close,
+ .l2_param_change = i40iw_l2param_change,
+ .virtchnl_receive = i40iw_virtchnl_receive,
+ .vf_reset = i40iw_vf_reset,
+ .vf_enable = i40iw_vf_enable,
+ .vf_capable = i40iw_vf_capable
+};
+
+/**
+ * i40iw_init_module - driver initialization function
+ *
+ * First function to call when the driver is loaded
+ * Register the driver as i40e client and port mapper client
+ */
+static int __init i40iw_init_module(void)
+{
+ int ret;
+
+ memset(&i40iw_client, 0, sizeof(i40iw_client));
+ i40iw_client.version.major = CLIENT_IW_INTERFACE_VERSION_MAJOR;
+ i40iw_client.version.minor = CLIENT_IW_INTERFACE_VERSION_MINOR;
+ i40iw_client.version.build = CLIENT_IW_INTERFACE_VERSION_BUILD;
+ i40iw_client.ops = &i40e_ops;
+ memcpy(i40iw_client.name, i40iw_client_name, I40E_CLIENT_STR_LENGTH);
+ i40iw_client.type = I40E_CLIENT_IWARP;
+ spin_lock_init(&i40iw_handler_lock);
+ ret = i40e_register_client(&i40iw_client);
+ return ret;
+}
+
+/**
+ * i40iw_exit_module - driver exit clean up function
+ *
+ * The function is called just before the driver is unloaded
+ * Unregister the driver as i40e client and port mapper client
+ */
+static void __exit i40iw_exit_module(void)
+{
+ i40e_unregister_client(&i40iw_client);
+}
+
+module_init(i40iw_init_module);
+module_exit(i40iw_exit_module);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
new file mode 100644
index 000000000000..7e20493510e8
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -0,0 +1,215 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_OSDEP_H
+#define I40IW_OSDEP_H
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <net/tcp.h>
+#include <crypto/hash.h>
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#define STATS_TIMER_DELAY 1000
+
+static inline void set_64bit_val(u64 *wqe_words, u32 byte_index, u64 value)
+{
+ wqe_words[byte_index >> 3] = value;
+}
+
+/**
+ * set_32bit_val - set 32 value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @value: value to write
+ **/
+static inline void set_32bit_val(u32 *wqe_words, u32 byte_index, u32 value)
+{
+ wqe_words[byte_index >> 2] = value;
+}
+
+/**
+ * get_64bit_val - read 64 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to read from
+ * @value: read value
+ **/
+static inline void get_64bit_val(u64 *wqe_words, u32 byte_index, u64 *value)
+{
+ *value = wqe_words[byte_index >> 3];
+}
+
+/**
+ * get_32bit_val - read 32 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to reaad from
+ * @value: return 32 bit value
+ **/
+static inline void get_32bit_val(u32 *wqe_words, u32 byte_index, u32 *value)
+{
+ *value = wqe_words[byte_index >> 2];
+}
+
+struct i40iw_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+} __packed;
+
+struct i40iw_virt_mem {
+ void *va;
+ u32 size;
+} __packed;
+
+#define i40iw_debug(h, m, s, ...) \
+do { \
+ if (((m) & (h)->debug_mask)) \
+ pr_info("i40iw " s, ##__VA_ARGS__); \
+} while (0)
+
+#define i40iw_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
+
+#define I40E_GLHMC_VFSDCMD(_i) (0x000C8000 + ((_i) * 4)) \
+ /* _i=0...31 */
+#define I40E_GLHMC_VFSDCMD_MAX_INDEX 31
+#define I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT 0
+#define I40E_GLHMC_VFSDCMD_PMSDIDX_MASK (0xFFF \
+ << I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PF_SHIFT 16
+#define I40E_GLHMC_VFSDCMD_PF_MASK (0xF << I40E_GLHMC_VFSDCMD_PF_SHIFT)
+#define I40E_GLHMC_VFSDCMD_VF_SHIFT 20
+#define I40E_GLHMC_VFSDCMD_VF_MASK (0x1FF << I40E_GLHMC_VFSDCMD_VF_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT 29
+#define I40E_GLHMC_VFSDCMD_PMF_TYPE_MASK (0x3 \
+ << I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT 31
+#define I40E_GLHMC_VFSDCMD_PMSDWR_MASK (0x1 << I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT)
+
+#define I40E_GLHMC_VFSDDATAHIGH(_i) (0x000C8200 + ((_i) * 4)) \
+ /* _i=0...31 */
+#define I40E_GLHMC_VFSDDATAHIGH_MAX_INDEX 31
+#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT 0
+#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_MASK (0xFFFFFFFF \
+ << I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT)
+
+#define I40E_GLHMC_VFSDDATALOW(_i) (0x000C8100 + ((_i) * 4)) \
+ /* _i=0...31 */
+#define I40E_GLHMC_VFSDDATALOW_MAX_INDEX 31
+#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT 0
+#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_MASK (0x1 \
+ << I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT 1
+#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_MASK (0x1 \
+ << I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_MASK (0x3FF \
+ << I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT 12
+#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_MASK (0xFFFFF \
+ << I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT)
+
+#define I40E_GLPE_FWLDSTATUS 0x0000D200
+#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT 0
+#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_MASK (0x1 \
+ << I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_DONE_SHIFT 1
+#define I40E_GLPE_FWLDSTATUS_DONE_MASK (0x1 << I40E_GLPE_FWLDSTATUS_DONE_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT 2
+#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_MASK (0x1 \
+ << I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT 3
+#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_MASK (0x1 \
+ << I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT 4
+#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_MASK (0x1 \
+ << I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT)
+
+struct i40iw_sc_dev;
+struct i40iw_sc_qp;
+struct i40iw_puda_buf;
+struct i40iw_puda_completion_info;
+struct i40iw_update_sds_info;
+struct i40iw_hmc_fcn_info;
+struct i40iw_virtchnl_work_info;
+struct i40iw_manage_vf_pble_info;
+struct i40iw_device;
+struct i40iw_hmc_info;
+struct i40iw_hw;
+
+u8 __iomem *i40iw_get_hw_addr(void *dev);
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
+ u32 length, u32 value);
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum);
+void i40iw_free_hash_desc(struct shash_desc *);
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **);
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+ struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_update_sds_info *info);
+enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_fcn_info *hmcfcninfo);
+enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_dma_mem *values_mem,
+ u8 hmc_fn_id);
+enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_dma_mem *values_mem,
+ u8 hmc_fn_id);
+enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
+ struct i40iw_dma_mem *mem);
+enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev,
+ struct i40iw_manage_vf_pble_info *info);
+void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
+void *i40iw_remove_head(struct list_head *list);
+
+void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
+void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
+void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp);
+void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp);
+
+enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
+ struct i40iw_manage_vf_pble_info *info,
+ bool wait);
+struct i40iw_dev_pestat;
+void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *);
+void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *);
+#define i40iw_mmiowb() mmiowb()
+void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
+u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg);
+#endif /* _I40IW_OSDEP_H_ */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
new file mode 100644
index 000000000000..a0b8ca10d67e
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h
@@ -0,0 +1,106 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_P_H
+#define I40IW_P_H
+
+#define PAUSE_TIMER_VALUE 0xFFFF
+#define REFRESH_THRESHOLD 0x7FFF
+#define HIGH_THRESHOLD 0x800
+#define LOW_THRESHOLD 0x200
+#define ALL_TC2PFC 0xFF
+
+void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
+ char *desc, u64 *buf, u32 size);
+/* init operations */
+enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
+ struct i40iw_device_init_info *info);
+
+enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *);
+
+void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
+
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp,
+ struct i40iw_fast_reg_stag_info *info,
+ bool post_sq);
+
+/* HMC/FPM functions */
+enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev,
+ u8 hmc_fn_id);
+
+enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
+ u32 *vf_cnt_array);
+
+/* cqp misc functions */
+
+void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
+
+void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
+
+void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
+
+enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
+ struct i40iw_sc_qp *qp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
+ struct i40iw_sc_qp *qp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp *cqp,
+ u64 scratch, u8 hmc_fn_id,
+ bool post_sq,
+ bool poll_registers);
+
+enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count);
+
+void free_sd_mem(struct i40iw_sc_dev *dev);
+
+enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
+ struct cqp_commands_info *pcmdinfo);
+
+enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev);
+
+/* prototype for functions used for dynamic memory allocation */
+enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
+ struct i40iw_dma_mem *mem, u64 size,
+ u32 alignment);
+void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem);
+enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
+ struct i40iw_virt_mem *mem, u32 size);
+enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+ struct i40iw_virt_mem *mem);
+u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq);
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
new file mode 100644
index 000000000000..ded853d2fad8
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -0,0 +1,618 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_status.h"
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+
+#include <linux/pci.h>
+#include <linux/genalloc.h>
+#include <linux/vmalloc.h>
+#include "i40iw_pble.h"
+#include "i40iw.h"
+
+struct i40iw_device;
+static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc);
+static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk);
+
+/**
+ * i40iw_destroy_pble_pool - destroy pool during module unload
+ * @pble_rsrc: pble resources
+ */
+void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+ struct list_head *clist;
+ struct list_head *tlist;
+ struct i40iw_chunk *chunk;
+ struct i40iw_pble_pool *pinfo = &pble_rsrc->pinfo;
+
+ if (pinfo->pool) {
+ list_for_each_safe(clist, tlist, &pinfo->clist) {
+ chunk = list_entry(clist, struct i40iw_chunk, list);
+ if (chunk->type == I40IW_VMALLOC)
+ i40iw_free_vmalloc_mem(dev->hw, chunk);
+ kfree(chunk);
+ }
+ gen_pool_destroy(pinfo->pool);
+ }
+}
+
+/**
+ * i40iw_hmc_init_pble - Initialize pble resources during module load
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc: pble resources
+ */
+enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+ struct i40iw_hmc_info *hmc_info;
+ u32 fpm_idx = 0;
+
+ hmc_info = dev->hmc_info;
+ pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].base;
+ /* Now start the pble' on 4k boundary */
+ if (pble_rsrc->fpm_base_addr & 0xfff)
+ fpm_idx = (PAGE_SIZE - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
+
+ pble_rsrc->unallocated_pble =
+ hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt - fpm_idx;
+ pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
+
+ pble_rsrc->pinfo.pool_shift = POOL_SHIFT;
+ pble_rsrc->pinfo.pool = gen_pool_create(pble_rsrc->pinfo.pool_shift, -1);
+ INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
+ if (!pble_rsrc->pinfo.pool)
+ goto error;
+
+ if (add_pble_pool(dev, pble_rsrc))
+ goto error;
+
+ return 0;
+
+ error:i40iw_destroy_pble_pool(dev, pble_rsrc);
+ return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
+ * @ pble_rsrc: structure containing fpm address
+ * @ idx: where to return indexes
+ */
+static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct sd_pd_idx *idx)
+{
+ idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_DIRECT_BP_SIZE;
+ idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_PAGED_BP_SIZE;
+ idx->rel_pd_idx = (idx->pd_idx % I40IW_HMC_PD_CNT_IN_SD);
+}
+
+/**
+ * add_sd_direct - add sd direct for pble
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource ptr
+ * @info: page info for sd
+ */
+static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_add_page_info *info)
+{
+ enum i40iw_status_code ret_code = 0;
+ struct sd_pd_idx *idx = &info->idx;
+ struct i40iw_chunk *chunk = info->chunk;
+ struct i40iw_hmc_info *hmc_info = info->hmc_info;
+ struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
+ u32 offset = 0;
+
+ if (!sd_entry->valid) {
+ if (dev->is_pf) {
+ ret_code = i40iw_add_sd_table_entry(dev->hw, hmc_info,
+ info->idx.sd_idx,
+ I40IW_SD_TYPE_DIRECT,
+ I40IW_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ return ret_code;
+ chunk->type = I40IW_DMA_COHERENT;
+ }
+ }
+ offset = idx->rel_pd_idx << I40IW_HMC_PAGED_BP_SHIFT;
+ chunk->size = info->pages << I40IW_HMC_PAGED_BP_SHIFT;
+ chunk->vaddr = ((u8 *)sd_entry->u.bp.addr.va + offset);
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ i40iw_debug(dev, I40IW_DEBUG_PBLE, "chunk_size[%d] = 0x%x vaddr=%p fpm_addr = %llx\n",
+ chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
+ return 0;
+}
+
+/**
+ * i40iw_free_vmalloc_mem - free vmalloc during close
+ * @hw: hw struct
+ * @chunk: chunk information for vmalloc
+ */
+static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ int i;
+
+ if (!chunk->pg_cnt)
+ goto done;
+ for (i = 0; i < chunk->pg_cnt; i++)
+ dma_unmap_page(&pcidev->dev, chunk->dmaaddrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ done:
+ kfree(chunk->dmaaddrs);
+ chunk->dmaaddrs = NULL;
+ vfree(chunk->vaddr);
+ chunk->vaddr = NULL;
+ chunk->type = 0;
+}
+
+/**
+ * i40iw_get_vmalloc_mem - get 2M page for sd
+ * @hw: hardware address
+ * @chunk: chunk to adf
+ * @pg_cnt: #of 4 K pages
+ */
+static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
+ struct i40iw_chunk *chunk,
+ int pg_cnt)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct page *page;
+ u8 *addr;
+ u32 size;
+ int i;
+
+ chunk->dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
+ if (!chunk->dmaaddrs)
+ return I40IW_ERR_NO_MEMORY;
+ size = PAGE_SIZE * pg_cnt;
+ chunk->vaddr = vmalloc(size);
+ if (!chunk->vaddr) {
+ kfree(chunk->dmaaddrs);
+ chunk->dmaaddrs = NULL;
+ return I40IW_ERR_NO_MEMORY;
+ }
+ chunk->size = size;
+ addr = (u8 *)chunk->vaddr;
+ for (i = 0; i < pg_cnt; i++) {
+ page = vmalloc_to_page((void *)addr);
+ if (!page)
+ break;
+ chunk->dmaaddrs[i] = dma_map_page(&pcidev->dev, page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&pcidev->dev, chunk->dmaaddrs[i]))
+ break;
+ addr += PAGE_SIZE;
+ }
+
+ chunk->pg_cnt = i;
+ chunk->type = I40IW_VMALLOC;
+ if (i == pg_cnt)
+ return 0;
+
+ i40iw_free_vmalloc_mem(hw, chunk);
+ return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * fpm_to_idx - given fpm address, get pble index
+ * @pble_rsrc: pble resource management
+ * @addr: fpm address for index
+ */
+static inline u32 fpm_to_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, u64 addr)
+{
+ return (addr - (pble_rsrc->fpm_base_addr)) >> 3;
+}
+
+/**
+ * add_bp_pages - add backing pages for sd
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ * @info: page info for sd
+ */
+static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_add_page_info *info)
+{
+ u8 *addr;
+ struct i40iw_dma_mem mem;
+ struct i40iw_hmc_pd_entry *pd_entry;
+ struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
+ struct i40iw_hmc_info *hmc_info = info->hmc_info;
+ struct i40iw_chunk *chunk = info->chunk;
+ struct i40iw_manage_vf_pble_info vf_pble_info;
+ enum i40iw_status_code status = 0;
+ u32 rel_pd_idx = info->idx.rel_pd_idx;
+ u32 pd_idx = info->idx.pd_idx;
+ u32 i;
+
+ status = i40iw_get_vmalloc_mem(dev->hw, chunk, info->pages);
+ if (status)
+ return I40IW_ERR_NO_MEMORY;
+ status = i40iw_add_sd_table_entry(dev->hw, hmc_info,
+ info->idx.sd_idx, I40IW_SD_TYPE_PAGED,
+ I40IW_HMC_DIRECT_BP_SIZE);
+ if (status) {
+ i40iw_free_vmalloc_mem(dev->hw, chunk);
+ return status;
+ }
+ if (!dev->is_pf) {
+ status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE,
+ fpm_to_idx(pble_rsrc,
+ pble_rsrc->next_fpm_addr),
+ (info->pages << PBLE_512_SHIFT));
+ if (status) {
+ i40iw_pr_err("allocate PBLEs in the PF. Error %i\n", status);
+ i40iw_free_vmalloc_mem(dev->hw, chunk);
+ return status;
+ }
+ }
+ addr = chunk->vaddr;
+ for (i = 0; i < info->pages; i++) {
+ mem.pa = chunk->dmaaddrs[i];
+ mem.size = PAGE_SIZE;
+ mem.va = (void *)(addr);
+ pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
+ if (!pd_entry->valid) {
+ status = i40iw_add_pd_table_entry(dev->hw, hmc_info, pd_idx++, &mem);
+ if (status)
+ goto error;
+ addr += PAGE_SIZE;
+ } else {
+ i40iw_pr_err("pd entry is valid expecting to be invalid\n");
+ }
+ }
+ if (!dev->is_pf) {
+ vf_pble_info.first_pd_index = info->idx.rel_pd_idx;
+ vf_pble_info.inv_pd_ent = false;
+ vf_pble_info.pd_entry_cnt = PBLE_PER_PAGE;
+ vf_pble_info.pd_pl_pba = sd_entry->u.pd_table.pd_page_addr.pa;
+ vf_pble_info.sd_index = info->idx.sd_idx;
+ status = i40iw_hw_manage_vf_pble_bp(dev->back_dev,
+ &vf_pble_info, true);
+ if (status) {
+ i40iw_pr_err("CQP manage VF PBLE BP failed. %i\n", status);
+ goto error;
+ }
+ }
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ return 0;
+error:
+ i40iw_free_vmalloc_mem(dev->hw, chunk);
+ return status;
+}
+
+/**
+ * add_pble_pool - add a sd entry for pble resoure
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ */
+static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+ struct i40iw_hmc_sd_entry *sd_entry;
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_chunk *chunk;
+ struct i40iw_add_page_info info;
+ struct sd_pd_idx *idx = &info.idx;
+ enum i40iw_status_code ret_code = 0;
+ enum i40iw_sd_entry_type sd_entry_type;
+ u64 sd_reg_val = 0;
+ u32 pages;
+
+ if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
+ return I40IW_ERR_NO_MEMORY;
+ if (pble_rsrc->next_fpm_addr & 0xfff) {
+ i40iw_pr_err("next fpm_addr %llx\n", pble_rsrc->next_fpm_addr);
+ return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+ }
+ chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+ if (!chunk)
+ return I40IW_ERR_NO_MEMORY;
+ hmc_info = dev->hmc_info;
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ get_sd_pd_idx(pble_rsrc, idx);
+ sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
+ pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
+ idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
+ pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
+ if (!pages) {
+ ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE;
+ goto error;
+ }
+ info.chunk = chunk;
+ info.hmc_info = hmc_info;
+ info.pages = pages;
+ info.sd_entry = sd_entry;
+ if (!sd_entry->valid) {
+ sd_entry_type = (!idx->rel_pd_idx &&
+ (pages == I40IW_HMC_PD_CNT_IN_SD) &&
+ dev->is_pf) ? I40IW_SD_TYPE_DIRECT : I40IW_SD_TYPE_PAGED;
+ } else {
+ sd_entry_type = sd_entry->entry_type;
+ }
+ i40iw_debug(dev, I40IW_DEBUG_PBLE,
+ "pages = %d, unallocated_pble[%u] current_fpm_addr = %llx\n",
+ pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr);
+ i40iw_debug(dev, I40IW_DEBUG_PBLE, "sd_entry_type = %d sd_entry valid = %d\n",
+ sd_entry_type, sd_entry->valid);
+
+ if (sd_entry_type == I40IW_SD_TYPE_DIRECT)
+ ret_code = add_sd_direct(dev, pble_rsrc, &info);
+ if (ret_code)
+ sd_entry_type = I40IW_SD_TYPE_PAGED;
+ else
+ pble_rsrc->stats_direct_sds++;
+
+ if (sd_entry_type == I40IW_SD_TYPE_PAGED) {
+ ret_code = add_bp_pages(dev, pble_rsrc, &info);
+ if (ret_code)
+ goto error;
+ else
+ pble_rsrc->stats_paged_sds++;
+ }
+
+ if (gen_pool_add_virt(pble_rsrc->pinfo.pool, (unsigned long)chunk->vaddr,
+ (phys_addr_t)chunk->fpm_addr, chunk->size, -1)) {
+ i40iw_pr_err("could not allocate memory by gen_pool_addr_virt()\n");
+ ret_code = I40IW_ERR_NO_MEMORY;
+ goto error;
+ }
+ pble_rsrc->next_fpm_addr += chunk->size;
+ i40iw_debug(dev, I40IW_DEBUG_PBLE, "next_fpm_addr = %llx chunk_size[%u] = 0x%x\n",
+ pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+ pble_rsrc->unallocated_pble -= (chunk->size >> 3);
+ list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+ sd_reg_val = (sd_entry_type == I40IW_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
+ if (sd_entry->valid)
+ return 0;
+ if (dev->is_pf)
+ ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+ sd_reg_val, idx->sd_idx,
+ sd_entry->entry_type, true);
+ if (ret_code) {
+ i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
+ goto error;
+ }
+
+ sd_entry->valid = true;
+ return 0;
+ error:
+ kfree(chunk);
+ return ret_code;
+}
+
+/**
+ * free_lvl2 - fee level 2 pble
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static void free_lvl2(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc)
+{
+ u32 i;
+ struct gen_pool *pool;
+ struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+ struct i40iw_pble_info *root = &lvl2->root;
+ struct i40iw_pble_info *leaf = lvl2->leaf;
+
+ pool = pble_rsrc->pinfo.pool;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ if (leaf->addr)
+ gen_pool_free(pool, leaf->addr, (leaf->cnt << 3));
+ else
+ break;
+ }
+
+ if (root->addr)
+ gen_pool_free(pool, root->addr, (root->cnt << 3));
+
+ kfree(lvl2->leaf);
+ lvl2->leaf = NULL;
+}
+
+/**
+ * get_lvl2_pble - get level 2 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ * @pool: pool pointer
+ */
+static enum i40iw_status_code get_lvl2_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc,
+ struct gen_pool *pool)
+{
+ u32 lf4k, lflast, total, i;
+ u32 pblcnt = PBLE_PER_PAGE;
+ u64 *addr;
+ struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+ struct i40iw_pble_info *root = &lvl2->root;
+ struct i40iw_pble_info *leaf;
+
+ /* number of full 512 (4K) leafs) */
+ lf4k = palloc->total_cnt >> 9;
+ lflast = palloc->total_cnt % PBLE_PER_PAGE;
+ total = (lflast == 0) ? lf4k : lf4k + 1;
+ lvl2->leaf_cnt = total;
+
+ leaf = kzalloc((sizeof(*leaf) * total), GFP_ATOMIC);
+ if (!leaf)
+ return I40IW_ERR_NO_MEMORY;
+ lvl2->leaf = leaf;
+ /* allocate pbles for the root */
+ root->addr = gen_pool_alloc(pool, (total << 3));
+ if (!root->addr) {
+ kfree(lvl2->leaf);
+ lvl2->leaf = NULL;
+ return I40IW_ERR_NO_MEMORY;
+ }
+ root->idx = fpm_to_idx(pble_rsrc,
+ (u64)gen_pool_virt_to_phys(pool, root->addr));
+ root->cnt = total;
+ addr = (u64 *)root->addr;
+ for (i = 0; i < total; i++, leaf++) {
+ pblcnt = (lflast && ((i + 1) == total)) ? lflast : PBLE_PER_PAGE;
+ leaf->addr = gen_pool_alloc(pool, (pblcnt << 3));
+ if (!leaf->addr)
+ goto error;
+ leaf->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool, leaf->addr));
+
+ leaf->cnt = pblcnt;
+ *addr = (u64)leaf->idx;
+ addr++;
+ }
+ palloc->level = I40IW_LEVEL_2;
+ pble_rsrc->stats_lvl2++;
+ return 0;
+ error:
+ free_lvl2(pble_rsrc, palloc);
+ return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * get_lvl1_pble - get level 1 pble resource
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ * @palloc: level 1 pble allocation
+ */
+static enum i40iw_status_code get_lvl1_pble(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc)
+{
+ u64 *addr;
+ struct gen_pool *pool;
+ struct i40iw_pble_info *lvl1 = &palloc->level1;
+
+ pool = pble_rsrc->pinfo.pool;
+ addr = (u64 *)gen_pool_alloc(pool, (palloc->total_cnt << 3));
+
+ if (!addr)
+ return I40IW_ERR_NO_MEMORY;
+
+ palloc->level = I40IW_LEVEL_1;
+ lvl1->addr = (unsigned long)addr;
+ lvl1->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool,
+ (unsigned long)addr));
+ lvl1->cnt = palloc->total_cnt;
+ pble_rsrc->stats_lvl1++;
+ return 0;
+}
+
+/**
+ * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pool: pointer to general purpose special memory pool descriptor
+ */
+static inline enum i40iw_status_code get_lvl1_lvl2_pble(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc,
+ struct gen_pool *pool)
+{
+ enum i40iw_status_code status = 0;
+
+ status = get_lvl1_pble(dev, pble_rsrc, palloc);
+ if (status && (palloc->total_cnt > PBLE_PER_PAGE))
+ status = get_lvl2_pble(pble_rsrc, palloc, pool);
+ return status;
+}
+
+/**
+ * i40iw_get_pble - allocate pbles from the pool
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pble_cnt: #of pbles requested
+ */
+enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc,
+ u32 pble_cnt)
+{
+ struct gen_pool *pool;
+ enum i40iw_status_code status = 0;
+ u32 max_sds = 0;
+ int i;
+
+ pool = pble_rsrc->pinfo.pool;
+ palloc->total_cnt = pble_cnt;
+ palloc->level = I40IW_LEVEL_0;
+ /*check first to see if we can get pble's without acquiring additional sd's */
+ status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
+ if (!status)
+ goto exit;
+ max_sds = (palloc->total_cnt >> 18) + 1;
+ for (i = 0; i < max_sds; i++) {
+ status = add_pble_pool(dev, pble_rsrc);
+ if (status)
+ break;
+ status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
+ if (!status)
+ break;
+ }
+exit:
+ if (!status)
+ pble_rsrc->stats_alloc_ok++;
+ else
+ pble_rsrc->stats_alloc_fail++;
+
+ return status;
+}
+
+/**
+ * i40iw_free_pble - put pbles back into pool
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble resource being freed
+ */
+void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc)
+{
+ struct gen_pool *pool;
+
+ pool = pble_rsrc->pinfo.pool;
+ if (palloc->level == I40IW_LEVEL_2)
+ free_lvl2(pble_rsrc, palloc);
+ else
+ gen_pool_free(pool, palloc->level1.addr,
+ (palloc->level1.cnt << 3));
+ pble_rsrc->stats_alloc_freed++;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.h b/drivers/infiniband/hw/i40iw/i40iw_pble.h
new file mode 100644
index 000000000000..7b1851d21cc0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.h
@@ -0,0 +1,131 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_PBLE_H
+#define I40IW_PBLE_H
+
+#define POOL_SHIFT 6
+#define PBLE_PER_PAGE 512
+#define I40IW_HMC_PAGED_BP_SHIFT 12
+#define PBLE_512_SHIFT 9
+
+enum i40iw_pble_level {
+ I40IW_LEVEL_0 = 0,
+ I40IW_LEVEL_1 = 1,
+ I40IW_LEVEL_2 = 2
+};
+
+enum i40iw_alloc_type {
+ I40IW_NO_ALLOC = 0,
+ I40IW_DMA_COHERENT = 1,
+ I40IW_VMALLOC = 2
+};
+
+struct i40iw_pble_info {
+ unsigned long addr;
+ u32 idx;
+ u32 cnt;
+};
+
+struct i40iw_pble_level2 {
+ struct i40iw_pble_info root;
+ struct i40iw_pble_info *leaf;
+ u32 leaf_cnt;
+};
+
+struct i40iw_pble_alloc {
+ u32 total_cnt;
+ enum i40iw_pble_level level;
+ union {
+ struct i40iw_pble_info level1;
+ struct i40iw_pble_level2 level2;
+ };
+};
+
+struct sd_pd_idx {
+ u32 sd_idx;
+ u32 pd_idx;
+ u32 rel_pd_idx;
+};
+
+struct i40iw_add_page_info {
+ struct i40iw_chunk *chunk;
+ struct i40iw_hmc_sd_entry *sd_entry;
+ struct i40iw_hmc_info *hmc_info;
+ struct sd_pd_idx idx;
+ u32 pages;
+};
+
+struct i40iw_chunk {
+ struct list_head list;
+ u32 size;
+ void *vaddr;
+ u64 fpm_addr;
+ u32 pg_cnt;
+ dma_addr_t *dmaaddrs;
+ enum i40iw_alloc_type type;
+};
+
+struct i40iw_pble_pool {
+ struct gen_pool *pool;
+ struct list_head clist;
+ u32 total_pble_alloc;
+ u32 free_pble_cnt;
+ u32 pool_shift;
+};
+
+struct i40iw_hmc_pble_rsrc {
+ u32 unallocated_pble;
+ u64 fpm_base_addr;
+ u64 next_fpm_addr;
+ struct i40iw_pble_pool pinfo;
+
+ u32 stats_direct_sds;
+ u32 stats_paged_sds;
+ u64 stats_alloc_ok;
+ u64 stats_alloc_fail;
+ u64 stats_alloc_freed;
+ u64 stats_lvl1;
+ u64 stats_lvl2;
+};
+
+void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc);
+enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc);
+void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct i40iw_pble_alloc *palloc);
+enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_pble_rsrc *pble_rsrc,
+ struct i40iw_pble_alloc *palloc,
+ u32 pble_cnt);
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
new file mode 100644
index 000000000000..8eb400d8a7a0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -0,0 +1,1436 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_puda.h"
+
+static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_buf *buf);
+static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid);
+static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
+static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
+ *rsrc, bool initial);
+/**
+ * i40iw_puda_get_listbuf - get buffer from puda list
+ * @list: list to use for buffers (ILQ or IEQ)
+ */
+static struct i40iw_puda_buf *i40iw_puda_get_listbuf(struct list_head *list)
+{
+ struct i40iw_puda_buf *buf = NULL;
+
+ if (!list_empty(list)) {
+ buf = (struct i40iw_puda_buf *)list->next;
+ list_del((struct list_head *)&buf->list);
+ }
+ return buf;
+}
+
+/**
+ * i40iw_puda_get_bufpool - return buffer from resource
+ * @rsrc: resource to use for buffer
+ */
+struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc)
+{
+ struct i40iw_puda_buf *buf = NULL;
+ struct list_head *list = &rsrc->bufpool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ buf = i40iw_puda_get_listbuf(list);
+ if (buf)
+ rsrc->avail_buf_count--;
+ else
+ rsrc->stats_buf_alloc_fail++;
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ return buf;
+}
+
+/**
+ * i40iw_puda_ret_bufpool - return buffer to rsrc list
+ * @rsrc: resource to use for buffer
+ * @buf: buffe to return to resouce
+ */
+void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
+ struct i40iw_puda_buf *buf)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ list_add(&buf->list, &rsrc->bufpool);
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ rsrc->avail_buf_count++;
+}
+
+/**
+ * i40iw_puda_post_recvbuf - set wqe for rcv buffer
+ * @rsrc: resource ptr
+ * @wqe_idx: wqe index to use
+ * @buf: puda buffer for rcv q
+ * @initial: flag if during init time
+ */
+static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx,
+ struct i40iw_puda_buf *buf, bool initial)
+{
+ u64 *wqe;
+ struct i40iw_sc_qp *qp = &rsrc->qp;
+ u64 offset24 = 0;
+
+ qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
+ wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s: wqe_idx= %d buf = %p wqe = %p\n", __func__,
+ wqe_idx, buf, wqe);
+ if (!initial)
+ get_64bit_val(wqe, 24, &offset24);
+
+ offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
+ set_64bit_val(wqe, 24, offset24);
+
+ set_64bit_val(wqe, 0, buf->mem.pa);
+ set_64bit_val(wqe, 8,
+ LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN));
+ set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * i40iw_puda_replenish_rq - post rcv buffers
+ * @rsrc: resource to use for buffer
+ * @initial: flag if during init time
+ */
+static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc,
+ bool initial)
+{
+ u32 i;
+ u32 invalid_cnt = rsrc->rxq_invalid_cnt;
+ struct i40iw_puda_buf *buf = NULL;
+
+ for (i = 0; i < invalid_cnt; i++) {
+ buf = i40iw_puda_get_bufpool(rsrc);
+ if (!buf)
+ return I40IW_ERR_list_empty;
+ i40iw_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf,
+ initial);
+ rsrc->rx_wqe_idx =
+ ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
+ rsrc->rxq_invalid_cnt--;
+ }
+ return 0;
+}
+
+/**
+ * i40iw_puda_alloc_buf - allocate mem for buffer
+ * @dev: iwarp device
+ * @length: length of buffer
+ */
+static struct i40iw_puda_buf *i40iw_puda_alloc_buf(struct i40iw_sc_dev *dev,
+ u32 length)
+{
+ struct i40iw_puda_buf *buf = NULL;
+ struct i40iw_virt_mem buf_mem;
+ enum i40iw_status_code ret;
+
+ ret = i40iw_allocate_virt_mem(dev->hw, &buf_mem,
+ sizeof(struct i40iw_puda_buf));
+ if (ret) {
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s: error mem for buf\n", __func__);
+ return NULL;
+ }
+ buf = (struct i40iw_puda_buf *)buf_mem.va;
+ ret = i40iw_allocate_dma_mem(dev->hw, &buf->mem, length, 1);
+ if (ret) {
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s: error dma mem for buf\n", __func__);
+ i40iw_free_virt_mem(dev->hw, &buf_mem);
+ return NULL;
+ }
+ buf->buf_mem.va = buf_mem.va;
+ buf->buf_mem.size = buf_mem.size;
+ return buf;
+}
+
+/**
+ * i40iw_puda_dele_buf - delete buffer back to system
+ * @dev: iwarp device
+ * @buf: buffer to free
+ */
+static void i40iw_puda_dele_buf(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_buf *buf)
+{
+ i40iw_free_dma_mem(dev->hw, &buf->mem);
+ i40iw_free_virt_mem(dev->hw, &buf->buf_mem);
+}
+
+/**
+ * i40iw_puda_get_next_send_wqe - return next wqe for processing
+ * @qp: puda qp for wqe
+ * @wqe_idx: wqe index for caller
+ */
+static u64 *i40iw_puda_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
+{
+ u64 *wqe = NULL;
+ enum i40iw_status_code ret_code = 0;
+
+ *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return wqe;
+ wqe = qp->sq_base[*wqe_idx].elem;
+
+ return wqe;
+}
+
+/**
+ * i40iw_puda_poll_info - poll cq for completion
+ * @cq: cq for poll
+ * @info: info return for successful completion
+ */
+static enum i40iw_status_code i40iw_puda_poll_info(struct i40iw_sc_cq *cq,
+ struct i40iw_puda_completion_info *info)
+{
+ u64 qword0, qword2, qword3;
+ u64 *cqe;
+ u64 comp_ctx;
+ bool valid_bit;
+ u32 major_err, minor_err;
+ bool error;
+
+ cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&cq->cq_uk);
+ get_64bit_val(cqe, 24, &qword3);
+ valid_bit = (bool)RS_64(qword3, I40IW_CQ_VALID);
+
+ if (valid_bit != cq->cq_uk.polarity)
+ return I40IW_ERR_QUEUE_EMPTY;
+
+ i40iw_debug_buf(cq->dev, I40IW_DEBUG_PUDA, "PUDA CQE", cqe, 32);
+ error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
+ if (error) {
+ i40iw_debug(cq->dev, I40IW_DEBUG_PUDA, "%s receive error\n", __func__);
+ major_err = (u32)(RS_64(qword3, I40IW_CQ_MAJERR));
+ minor_err = (u32)(RS_64(qword3, I40IW_CQ_MINERR));
+ info->compl_error = major_err << 16 | minor_err;
+ return I40IW_ERR_CQ_COMPL_ERROR;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
+ info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ info->qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+ info->wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
+
+ if (info->q_type == I40IW_CQE_QTYPE_RQ) {
+ info->vlan_valid = (bool)RS_64(qword3, I40IW_VLAN_TAG_VALID);
+ info->l4proto = (u8)RS_64(qword2, I40IW_UDA_L4PROTO);
+ info->l3proto = (u8)RS_64(qword2, I40IW_UDA_L3PROTO);
+ info->payload_len = (u16)RS_64(qword0, I40IW_UDA_PAYLOADLEN);
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_puda_poll_completion - processes completion for cq
+ * @dev: iwarp device
+ * @cq: cq getting interrupt
+ * @compl_err: return any completion err
+ */
+enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_cq *cq, u32 *compl_err)
+{
+ struct i40iw_qp_uk *qp;
+ struct i40iw_cq_uk *cq_uk = &cq->cq_uk;
+ struct i40iw_puda_completion_info info;
+ enum i40iw_status_code ret = 0;
+ struct i40iw_puda_buf *buf;
+ struct i40iw_puda_rsrc *rsrc;
+ void *sqwrid;
+ u8 cq_type = cq->cq_type;
+ unsigned long flags;
+
+ if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
+ rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
+ return I40IW_ERR_BAD_PTR;
+ }
+ memset(&info, 0, sizeof(info));
+ ret = i40iw_puda_poll_info(cq, &info);
+ *compl_err = info.compl_error;
+ if (ret == I40IW_ERR_QUEUE_EMPTY)
+ return ret;
+ if (ret)
+ goto done;
+
+ qp = info.qp;
+ if (!qp || !rsrc) {
+ ret = I40IW_ERR_BAD_PTR;
+ goto done;
+ }
+
+ if (qp->qp_id != rsrc->qp_id) {
+ ret = I40IW_ERR_BAD_PTR;
+ goto done;
+ }
+
+ if (info.q_type == I40IW_CQE_QTYPE_RQ) {
+ buf = (struct i40iw_puda_buf *)(uintptr_t)qp->rq_wrid_array[info.wqe_idx];
+ /* Get all the tcpip information in the buf header */
+ ret = i40iw_puda_get_tcpip_info(&info, buf);
+ if (ret) {
+ rsrc->stats_rcvd_pkt_err++;
+ if (cq_type == I40IW_CQ_TYPE_ILQ) {
+ i40iw_ilq_putback_rcvbuf(&rsrc->qp,
+ info.wqe_idx);
+ } else {
+ i40iw_puda_ret_bufpool(rsrc, buf);
+ i40iw_puda_replenish_rq(rsrc, false);
+ }
+ goto done;
+ }
+
+ rsrc->stats_pkt_rcvd++;
+ rsrc->compl_rxwqe_idx = info.wqe_idx;
+ i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
+ rsrc->receive(rsrc->dev, buf);
+ if (cq_type == I40IW_CQ_TYPE_ILQ)
+ i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
+ else
+ i40iw_puda_replenish_rq(rsrc, false);
+
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
+ sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
+ I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
+ rsrc->xmit_complete(rsrc->dev, sqwrid);
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ rsrc->tx_wqe_avail_cnt++;
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ if (!list_empty(&dev->ilq->txpend))
+ i40iw_puda_send_buf(dev->ilq, NULL);
+ }
+
+done:
+ I40IW_RING_MOVE_HEAD(cq_uk->cq_ring, ret);
+ if (I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring) == 0)
+ cq_uk->polarity = !cq_uk->polarity;
+ /* update cq tail in cq shadow memory also */
+ I40IW_RING_MOVE_TAIL(cq_uk->cq_ring);
+ set_64bit_val(cq_uk->shadow_area, 0,
+ I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring));
+ return 0;
+}
+
+/**
+ * i40iw_puda_send - complete send wqe for transmit
+ * @qp: puda qp for send
+ * @info: buffer information for transmit
+ */
+enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
+ struct i40iw_puda_send_info *info)
+{
+ u64 *wqe;
+ u32 iplen, l4len;
+ u64 header[2];
+ u32 wqe_idx;
+ u8 iipt;
+
+ /* number of 32 bits DWORDS in header */
+ l4len = info->tcplen >> 2;
+ if (info->ipv4) {
+ iipt = 3;
+ iplen = 5;
+ } else {
+ iipt = 1;
+ iplen = 10;
+ }
+
+ wqe = i40iw_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+ qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
+ /* Third line of WQE descriptor */
+ /* maclen is in words */
+ header[0] = LS_64((info->maclen >> 1), I40IW_UDA_QPSQ_MACLEN) |
+ LS_64(iplen, I40IW_UDA_QPSQ_IPLEN) | LS_64(1, I40IW_UDA_QPSQ_L4T) |
+ LS_64(iipt, I40IW_UDA_QPSQ_IIPT) |
+ LS_64(l4len, I40IW_UDA_QPSQ_L4LEN);
+ /* Forth line of WQE descriptor */
+ header[1] = LS_64(I40IW_OP_TYPE_SEND, I40IW_UDA_QPSQ_OPCODE) |
+ LS_64(1, I40IW_UDA_QPSQ_SIGCOMPL) |
+ LS_64(info->doloopback, I40IW_UDA_QPSQ_DOLOOPBACK) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IW_UDA_QPSQ_VALID);
+
+ set_64bit_val(wqe, 0, info->paddr);
+ set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
+ set_64bit_val(wqe, 16, header[0]);
+ set_64bit_val(wqe, 24, header[1]);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
+ i40iw_qp_post_wr(&qp->qp_uk);
+ return 0;
+}
+
+/**
+ * i40iw_puda_send_buf - transmit puda buffer
+ * @rsrc: resource to use for buffer
+ * @buf: puda buffer to transmit
+ */
+void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf)
+{
+ struct i40iw_puda_send_info info;
+ enum i40iw_status_code ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ /* if no wqe available or not from a completion and we have
+ * pending buffers, we must queue new buffer
+ */
+ if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
+ list_add_tail(&buf->list, &rsrc->txpend);
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ rsrc->stats_sent_pkt_q++;
+ if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s: adding to txpend\n", __func__);
+ return;
+ }
+ rsrc->tx_wqe_avail_cnt--;
+ /* if we are coming from a completion and have pending buffers
+ * then Get one from pending list
+ */
+ if (!buf) {
+ buf = i40iw_puda_get_listbuf(&rsrc->txpend);
+ if (!buf)
+ goto done;
+ }
+
+ info.scratch = (void *)buf;
+ info.paddr = buf->mem.pa;
+ info.len = buf->totallen;
+ info.tcplen = buf->tcphlen;
+ info.maclen = buf->maclen;
+ info.ipv4 = buf->ipv4;
+ info.doloopback = (rsrc->type == I40IW_PUDA_RSRC_TYPE_IEQ);
+
+ ret = i40iw_puda_send(&rsrc->qp, &info);
+ if (ret) {
+ rsrc->tx_wqe_avail_cnt++;
+ rsrc->stats_sent_pkt_q++;
+ list_add(&buf->list, &rsrc->txpend);
+ if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s: adding to puda_send\n", __func__);
+ } else {
+ rsrc->stats_pkt_sent++;
+ }
+done:
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+}
+
+/**
+ * i40iw_puda_qp_setctx - during init, set qp's context
+ * @rsrc: qp's resource
+ */
+static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
+{
+ struct i40iw_sc_qp *qp = &rsrc->qp;
+ u64 *qp_ctx = qp->hw_host_ctx;
+
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+ set_64bit_val(qp_ctx, 24,
+ LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
+ LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE));
+
+ set_64bit_val(qp_ctx, 48, LS_64(1514, I40IWQPC_SNDMSS));
+ set_64bit_val(qp_ctx, 56, 0);
+ set_64bit_val(qp_ctx, 64, 1);
+
+ set_64bit_val(qp_ctx, 136,
+ LS_64(rsrc->cq_id, I40IWQPC_TXCQNUM) |
+ LS_64(rsrc->cq_id, I40IWQPC_RXCQNUM));
+
+ set_64bit_val(qp_ctx, 160, LS_64(1, I40IWQPC_PRIVEN));
+
+ set_64bit_val(qp_ctx, 168,
+ LS_64((uintptr_t)qp, I40IWQPC_QPCOMPCTX));
+
+ set_64bit_val(qp_ctx, 176,
+ LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
+ LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
+ LS_64(qp->qs_handle, I40IWQPC_QSHANDLE));
+
+ i40iw_debug_buf(rsrc->dev, I40IW_DEBUG_PUDA, "PUDA QP CONTEXT",
+ qp_ctx, I40IW_QP_CTX_SIZE);
+}
+
+/**
+ * i40iw_puda_qp_wqe - setup wqe for qp create
+ * @rsrc: resource for qp
+ */
+static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc)
+{
+ struct i40iw_sc_qp *qp = &rsrc->qp;
+ struct i40iw_sc_dev *dev = rsrc->dev;
+ struct i40iw_sc_cqp *cqp;
+ u64 *wqe;
+ u64 header;
+ struct i40iw_ccq_cqe_info compl_info;
+ enum i40iw_status_code status = 0;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+ header = qp->qp_uk.qp_id |
+ LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
+ LS_64(I40IW_QP_TYPE_UDA, I40IW_CQPSQ_QP_QPTYPE) |
+ LS_64(1, I40IW_CQPSQ_QP_CQNUMVALID) |
+ LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ set_64bit_val(wqe, 24, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32);
+ i40iw_sc_cqp_post_sq(cqp);
+ status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_CREATE_QP,
+ &compl_info);
+ return status;
+}
+
+/**
+ * i40iw_puda_qp_create - create qp for resource
+ * @rsrc: resource to use for buffer
+ */
+static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
+{
+ struct i40iw_sc_qp *qp = &rsrc->qp;
+ struct i40iw_qp_uk *ukqp = &qp->qp_uk;
+ enum i40iw_status_code ret = 0;
+ u32 sq_size, rq_size, t_size;
+ struct i40iw_dma_mem *mem;
+
+ sq_size = rsrc->sq_size * I40IW_QP_WQE_MIN_SIZE;
+ rq_size = rsrc->rq_size * I40IW_QP_WQE_MIN_SIZE;
+ t_size = (sq_size + rq_size + (I40IW_SHADOW_AREA_SIZE << 3) +
+ I40IW_QP_CTX_SIZE);
+ /* Get page aligned memory */
+ ret =
+ i40iw_allocate_dma_mem(rsrc->dev->hw, &rsrc->qpmem, t_size,
+ I40IW_HW_PAGE_SIZE);
+ if (ret) {
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s: error dma mem\n", __func__);
+ return ret;
+ }
+
+ mem = &rsrc->qpmem;
+ memset(mem->va, 0, t_size);
+ qp->hw_sq_size = i40iw_get_encoded_wqe_size(rsrc->sq_size, false);
+ qp->hw_rq_size = i40iw_get_encoded_wqe_size(rsrc->rq_size, false);
+ qp->pd = &rsrc->sc_pd;
+ qp->qp_type = I40IW_QP_TYPE_UDA;
+ qp->dev = rsrc->dev;
+ qp->back_qp = (void *)rsrc;
+ qp->sq_pa = mem->pa;
+ qp->rq_pa = qp->sq_pa + sq_size;
+ ukqp->sq_base = mem->va;
+ ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
+ ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
+ qp->shadow_area_pa = qp->rq_pa + rq_size;
+ qp->hw_host_ctx = ukqp->shadow_area + I40IW_SHADOW_AREA_SIZE;
+ qp->hw_host_ctx_pa =
+ qp->shadow_area_pa + (I40IW_SHADOW_AREA_SIZE << 3);
+ ukqp->qp_id = rsrc->qp_id;
+ ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
+ ukqp->rq_wrid_array = rsrc->rq_wrid_array;
+
+ ukqp->qp_id = rsrc->qp_id;
+ ukqp->sq_size = rsrc->sq_size;
+ ukqp->rq_size = rsrc->rq_size;
+
+ I40IW_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
+ I40IW_RING_INIT(ukqp->initial_ring, ukqp->sq_size);
+ I40IW_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
+
+ if (qp->pd->dev->is_pf)
+ ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+ I40E_PFPE_WQEALLOC);
+ else
+ ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+ I40E_VFPE_WQEALLOC1);
+
+ qp->qs_handle = qp->dev->qs_handle;
+ i40iw_puda_qp_setctx(rsrc);
+ ret = i40iw_puda_qp_wqe(rsrc);
+ if (ret)
+ i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
+ return ret;
+}
+
+/**
+ * i40iw_puda_cq_create - create cq for resource
+ * @rsrc: resource for which cq to create
+ */
+static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
+{
+ struct i40iw_sc_dev *dev = rsrc->dev;
+ struct i40iw_sc_cq *cq = &rsrc->cq;
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ enum i40iw_status_code ret = 0;
+ u32 tsize, cqsize;
+ u32 shadow_read_threshold = 128;
+ struct i40iw_dma_mem *mem;
+ struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_cq_init_info info;
+ struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
+
+ cq->back_cq = (void *)rsrc;
+ cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
+ tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
+ ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
+ I40IW_CQ0_ALIGNMENT_MASK);
+ if (ret)
+ return ret;
+
+ mem = &rsrc->cqmem;
+ memset(&info, 0, sizeof(info));
+ info.dev = dev;
+ info.type = (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ) ?
+ I40IW_CQ_TYPE_ILQ : I40IW_CQ_TYPE_IEQ;
+ info.shadow_read_threshold = rsrc->cq_size >> 2;
+ info.ceq_id_valid = true;
+ info.cq_base_pa = mem->pa;
+ info.shadow_area_pa = mem->pa + cqsize;
+ init_info->cq_base = mem->va;
+ init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
+ init_info->cq_size = rsrc->cq_size;
+ init_info->cq_id = rsrc->cq_id;
+ ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
+ if (ret)
+ goto error;
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe) {
+ ret = I40IW_ERR_RING_FULL;
+ goto error;
+ }
+
+ set_64bit_val(wqe, 0, rsrc->cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+ set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+ set_64bit_val(wqe, 32, cq->cq_pa);
+
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+
+ header = rsrc->cq_id |
+ LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+ LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ set_64bit_val(wqe, 24, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_sc_cqp_post_sq(dev->cqp);
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_CREATE_CQ,
+ &compl_info);
+
+error:
+ if (ret)
+ i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+ return ret;
+}
+
+/**
+ * i40iw_puda_dele_resources - delete all resources during close
+ * @dev: iwarp device
+ * @type: type of resource to dele
+ * @reset: true if reset chip
+ */
+void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+ enum puda_resource_type type,
+ bool reset)
+{
+ struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_puda_rsrc *rsrc;
+ struct i40iw_puda_buf *buf = NULL;
+ struct i40iw_puda_buf *nextbuf = NULL;
+ struct i40iw_virt_mem *vmem;
+ enum i40iw_status_code ret;
+
+ switch (type) {
+ case I40IW_PUDA_RSRC_TYPE_ILQ:
+ rsrc = dev->ilq;
+ vmem = &dev->ilq_mem;
+ break;
+ case I40IW_PUDA_RSRC_TYPE_IEQ:
+ rsrc = dev->ieq;
+ vmem = &dev->ieq_mem;
+ break;
+ default:
+ i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
+ __func__, type);
+ return;
+ }
+
+ switch (rsrc->completion) {
+ case PUDA_HASH_CRC_COMPLETE:
+ i40iw_free_hash_desc(rsrc->hash_desc);
+ case PUDA_QP_CREATED:
+ do {
+ if (reset)
+ break;
+ ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
+ 0, false, true, true);
+ if (ret)
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s error ieq qp destroy\n",
+ __func__);
+
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_DESTROY_QP,
+ &compl_info);
+ if (ret)
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s error ieq qp destroy done\n",
+ __func__);
+ } while (0);
+
+ i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
+ /* fallthrough */
+ case PUDA_CQ_CREATED:
+ do {
+ if (reset)
+ break;
+ ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
+ if (ret)
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s error ieq cq destroy\n",
+ __func__);
+
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_DESTROY_CQ,
+ &compl_info);
+ if (ret)
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+ "%s error ieq qp destroy done\n",
+ __func__);
+ } while (0);
+
+ i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+ break;
+ default:
+ i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s error no resources\n", __func__);
+ break;
+ }
+ /* Free all allocated puda buffers for both tx and rx */
+ buf = rsrc->alloclist;
+ while (buf) {
+ nextbuf = buf->next;
+ i40iw_puda_dele_buf(dev, buf);
+ buf = nextbuf;
+ rsrc->alloc_buf_count--;
+ }
+ i40iw_free_virt_mem(dev->hw, vmem);
+}
+
+/**
+ * i40iw_puda_allocbufs - allocate buffers for resource
+ * @rsrc: resource for buffer allocation
+ * @count: number of buffers to create
+ */
+static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
+ u32 count)
+{
+ u32 i;
+ struct i40iw_puda_buf *buf;
+ struct i40iw_puda_buf *nextbuf;
+
+ for (i = 0; i < count; i++) {
+ buf = i40iw_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
+ if (!buf) {
+ rsrc->stats_buf_alloc_fail++;
+ return I40IW_ERR_NO_MEMORY;
+ }
+ i40iw_puda_ret_bufpool(rsrc, buf);
+ rsrc->alloc_buf_count++;
+ if (!rsrc->alloclist) {
+ rsrc->alloclist = buf;
+ } else {
+ nextbuf = rsrc->alloclist;
+ rsrc->alloclist = buf;
+ buf->next = nextbuf;
+ }
+ }
+ rsrc->avail_buf_count = rsrc->alloc_buf_count;
+ return 0;
+}
+
+/**
+ * i40iw_puda_create_rsrc - create resouce (ilq or ieq)
+ * @dev: iwarp device
+ * @info: resource information
+ */
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_rsrc_info *info)
+{
+ enum i40iw_status_code ret = 0;
+ struct i40iw_puda_rsrc *rsrc;
+ u32 pudasize;
+ u32 sqwridsize, rqwridsize;
+ struct i40iw_virt_mem *vmem;
+
+ info->count = 1;
+ pudasize = sizeof(struct i40iw_puda_rsrc);
+ sqwridsize = info->sq_size * sizeof(struct i40iw_sq_uk_wr_trk_info);
+ rqwridsize = info->rq_size * 8;
+ switch (info->type) {
+ case I40IW_PUDA_RSRC_TYPE_ILQ:
+ vmem = &dev->ilq_mem;
+ break;
+ case I40IW_PUDA_RSRC_TYPE_IEQ:
+ vmem = &dev->ieq_mem;
+ break;
+ default:
+ return I40IW_NOT_SUPPORTED;
+ }
+ ret =
+ i40iw_allocate_virt_mem(dev->hw, vmem,
+ pudasize + sqwridsize + rqwridsize);
+ if (ret)
+ return ret;
+ rsrc = (struct i40iw_puda_rsrc *)vmem->va;
+ spin_lock_init(&rsrc->bufpool_lock);
+ if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
+ dev->ilq = (struct i40iw_puda_rsrc *)vmem->va;
+ dev->ilq_count = info->count;
+ rsrc->receive = info->receive;
+ rsrc->xmit_complete = info->xmit_complete;
+ } else {
+ vmem = &dev->ieq_mem;
+ dev->ieq_count = info->count;
+ dev->ieq = (struct i40iw_puda_rsrc *)vmem->va;
+ rsrc->receive = i40iw_ieq_receive;
+ rsrc->xmit_complete = i40iw_ieq_tx_compl;
+ }
+
+ rsrc->type = info->type;
+ rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
+ rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
+ rsrc->mss = info->mss;
+ /* Initialize all ieq lists */
+ INIT_LIST_HEAD(&rsrc->bufpool);
+ INIT_LIST_HEAD(&rsrc->txpend);
+
+ rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
+ dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id);
+ rsrc->qp_id = info->qp_id;
+ rsrc->cq_id = info->cq_id;
+ rsrc->sq_size = info->sq_size;
+ rsrc->rq_size = info->rq_size;
+ rsrc->cq_size = info->rq_size + info->sq_size;
+ rsrc->buf_size = info->buf_size;
+ rsrc->dev = dev;
+
+ ret = i40iw_puda_cq_create(rsrc);
+ if (!ret) {
+ rsrc->completion = PUDA_CQ_CREATED;
+ ret = i40iw_puda_qp_create(rsrc);
+ }
+ if (ret) {
+ i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", __func__);
+ goto error;
+ }
+ rsrc->completion = PUDA_QP_CREATED;
+
+ ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
+ if (ret) {
+ i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error allloc_buf\n", __func__);
+ goto error;
+ }
+
+ rsrc->rxq_invalid_cnt = info->rq_size;
+ ret = i40iw_puda_replenish_rq(rsrc, true);
+ if (ret)
+ goto error;
+
+ if (info->type == I40IW_PUDA_RSRC_TYPE_IEQ) {
+ if (!i40iw_init_hash_desc(&rsrc->hash_desc)) {
+ rsrc->check_crc = true;
+ rsrc->completion = PUDA_HASH_CRC_COMPLETE;
+ ret = 0;
+ }
+ }
+
+ dev->ccq_ops->ccq_arm(&rsrc->cq);
+ return ret;
+ error:
+ i40iw_puda_dele_resources(dev, info->type, false);
+
+ return ret;
+}
+
+/**
+ * i40iw_ilq_putback_rcvbuf - ilq buffer to put back on rq
+ * @qp: ilq's qp resource
+ * @wqe_idx: wqe index of completed rcvbuf
+ */
+static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx)
+{
+ u64 *wqe;
+ u64 offset24;
+
+ wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+ get_64bit_val(wqe, 24, &offset24);
+ offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
+ set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * i40iw_ieq_get_fpdu - given length return fpdu length
+ * @length: length if fpdu
+ */
+static u16 i40iw_ieq_get_fpdu_length(u16 length)
+{
+ u16 fpdu_len;
+
+ fpdu_len = length + I40IW_IEQ_MPA_FRAMING;
+ fpdu_len = (fpdu_len + 3) & 0xfffffffc;
+ return fpdu_len;
+}
+
+/**
+ * i40iw_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
+ * @buf: rcv buffer with partial
+ * @txbuf: tx buffer for sendign back
+ * @buf_offset: rcv buffer offset to copy from
+ * @txbuf_offset: at offset in tx buf to copy
+ * @length: length of data to copy
+ */
+static void i40iw_ieq_copy_to_txbuf(struct i40iw_puda_buf *buf,
+ struct i40iw_puda_buf *txbuf,
+ u16 buf_offset, u32 txbuf_offset,
+ u32 length)
+{
+ void *mem1 = (u8 *)buf->mem.va + buf_offset;
+ void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
+
+ memcpy(mem2, mem1, length);
+}
+
+/**
+ * i40iw_ieq_setup_tx_buf - setup tx buffer for partial handling
+ * @buf: reeive buffer with partial
+ * @txbuf: buffer to prepare
+ */
+static void i40iw_ieq_setup_tx_buf(struct i40iw_puda_buf *buf,
+ struct i40iw_puda_buf *txbuf)
+{
+ txbuf->maclen = buf->maclen;
+ txbuf->tcphlen = buf->tcphlen;
+ txbuf->ipv4 = buf->ipv4;
+ txbuf->hdrlen = buf->hdrlen;
+ i40iw_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
+}
+
+/**
+ * i40iw_ieq_check_first_buf - check if rcv buffer's seq is in range
+ * @buf: receive exception buffer
+ * @fps: first partial sequence number
+ */
+static void i40iw_ieq_check_first_buf(struct i40iw_puda_buf *buf, u32 fps)
+{
+ u32 offset;
+
+ if (buf->seqnum < fps) {
+ offset = fps - buf->seqnum;
+ if (offset > buf->datalen)
+ return;
+ buf->data += offset;
+ buf->datalen -= (u16)offset;
+ buf->seqnum = fps;
+ }
+}
+
+/**
+ * i40iw_ieq_compl_pfpdu - write txbuf with full fpdu
+ * @ieq: ieq resource
+ * @rxlist: ieq's received buffer list
+ * @pbufl: temporary list for buffers for fpddu
+ * @txbuf: tx buffer for fpdu
+ * @fpdu_len: total length of fpdu
+ */
+static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct i40iw_puda_buf *txbuf,
+ u16 fpdu_len)
+{
+ struct i40iw_puda_buf *buf;
+ u32 nextseqnum;
+ u16 txoffset, bufoffset;
+
+ buf = i40iw_puda_get_listbuf(pbufl);
+ nextseqnum = buf->seqnum + fpdu_len;
+ txbuf->totallen = buf->hdrlen + fpdu_len;
+ txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
+ i40iw_ieq_setup_tx_buf(buf, txbuf);
+
+ txoffset = buf->hdrlen;
+ bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+
+ do {
+ if (buf->datalen >= fpdu_len) {
+ /* copied full fpdu */
+ i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, fpdu_len);
+ buf->datalen -= fpdu_len;
+ buf->data += fpdu_len;
+ buf->seqnum = nextseqnum;
+ break;
+ }
+ /* copy partial fpdu */
+ i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, buf->datalen);
+ txoffset += buf->datalen;
+ fpdu_len -= buf->datalen;
+ i40iw_puda_ret_bufpool(ieq, buf);
+ buf = i40iw_puda_get_listbuf(pbufl);
+ bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+ } while (1);
+
+ /* last buffer on the list*/
+ if (buf->datalen)
+ list_add(&buf->list, rxlist);
+ else
+ i40iw_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * i40iw_ieq_create_pbufl - create buffer list for single fpdu
+ * @rxlist: resource list for receive ieq buffes
+ * @pbufl: temp. list for buffers for fpddu
+ * @buf: first receive buffer
+ * @fpdu_len: total length of fpdu
+ */
+static enum i40iw_status_code i40iw_ieq_create_pbufl(
+ struct i40iw_pfpdu *pfpdu,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct i40iw_puda_buf *buf,
+ u16 fpdu_len)
+{
+ enum i40iw_status_code status = 0;
+ struct i40iw_puda_buf *nextbuf;
+ u32 nextseqnum;
+ u16 plen = fpdu_len - buf->datalen;
+ bool done = false;
+
+ nextseqnum = buf->seqnum + buf->datalen;
+ do {
+ nextbuf = i40iw_puda_get_listbuf(rxlist);
+ if (!nextbuf) {
+ status = I40IW_ERR_list_empty;
+ break;
+ }
+ list_add_tail(&nextbuf->list, pbufl);
+ if (nextbuf->seqnum != nextseqnum) {
+ pfpdu->bad_seq_num++;
+ status = I40IW_ERR_SEQ_NUM;
+ break;
+ }
+ if (nextbuf->datalen >= plen) {
+ done = true;
+ } else {
+ plen -= nextbuf->datalen;
+ nextseqnum = nextbuf->seqnum + nextbuf->datalen;
+ }
+
+ } while (!done);
+
+ return status;
+}
+
+/**
+ * i40iw_ieq_handle_partial - process partial fpdu buffer
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ * @fpdu_len: fpdu len in the buffer
+ */
+static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *ieq,
+ struct i40iw_pfpdu *pfpdu,
+ struct i40iw_puda_buf *buf,
+ u16 fpdu_len)
+{
+ enum i40iw_status_code status = 0;
+ u8 *crcptr;
+ u32 mpacrc;
+ u32 seqnum = buf->seqnum;
+ struct list_head pbufl; /* partial buffer list */
+ struct i40iw_puda_buf *txbuf = NULL;
+ struct list_head *rxlist = &pfpdu->rxlist;
+
+ INIT_LIST_HEAD(&pbufl);
+ list_add(&buf->list, &pbufl);
+
+ status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
+ if (!status)
+ goto error;
+
+ txbuf = i40iw_puda_get_bufpool(ieq);
+ if (!txbuf) {
+ pfpdu->no_tx_bufs++;
+ status = I40IW_ERR_NO_TXBUFS;
+ goto error;
+ }
+
+ i40iw_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
+ i40iw_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
+ crcptr = txbuf->data + fpdu_len - 4;
+ mpacrc = *(u32 *)crcptr;
+ if (ieq->check_crc) {
+ status = i40iw_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
+ (fpdu_len - 4), mpacrc);
+ if (status) {
+ i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+ "%s: error bad crc\n", __func__);
+ goto error;
+ }
+ }
+
+ i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "IEQ TX BUFFER",
+ txbuf->mem.va, txbuf->totallen);
+ i40iw_puda_send_buf(ieq, txbuf);
+ pfpdu->rcv_nxt = seqnum + fpdu_len;
+ return status;
+ error:
+ while (!list_empty(&pbufl)) {
+ buf = (struct i40iw_puda_buf *)(pbufl.prev);
+ list_del(&buf->list);
+ list_add(&buf->list, rxlist);
+ }
+ if (txbuf)
+ i40iw_puda_ret_bufpool(ieq, txbuf);
+ return status;
+}
+
+/**
+ * i40iw_ieq_process_buf - process buffer rcvd for ieq
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ */
+static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
+ struct i40iw_pfpdu *pfpdu,
+ struct i40iw_puda_buf *buf)
+{
+ u16 fpdu_len = 0;
+ u16 datalen = buf->datalen;
+ u8 *datap = buf->data;
+ u8 *crcptr;
+ u16 ioffset = 0;
+ u32 mpacrc;
+ u32 seqnum = buf->seqnum;
+ u16 length = 0;
+ u16 full = 0;
+ bool partial = false;
+ struct i40iw_puda_buf *txbuf;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ enum i40iw_status_code ret = 0;
+ enum i40iw_status_code status = 0;
+
+ ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
+ while (datalen) {
+ fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap));
+ if (fpdu_len > pfpdu->max_fpdu_data) {
+ i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+ "%s: error bad fpdu_len\n", __func__);
+ status = I40IW_ERR_MPA_CRC;
+ list_add(&buf->list, rxlist);
+ return status;
+ }
+
+ if (datalen < fpdu_len) {
+ partial = true;
+ break;
+ }
+ crcptr = datap + fpdu_len - 4;
+ mpacrc = *(u32 *)crcptr;
+ if (ieq->check_crc)
+ ret = i40iw_ieq_check_mpacrc(ieq->hash_desc,
+ datap, fpdu_len - 4, mpacrc);
+ if (ret) {
+ status = I40IW_ERR_MPA_CRC;
+ list_add(&buf->list, rxlist);
+ return status;
+ }
+ full++;
+ pfpdu->fpdu_processed++;
+ datap += fpdu_len;
+ length += fpdu_len;
+ datalen -= fpdu_len;
+ }
+ if (full) {
+ /* copy full pdu's in the txbuf and send them out */
+ txbuf = i40iw_puda_get_bufpool(ieq);
+ if (!txbuf) {
+ pfpdu->no_tx_bufs++;
+ status = I40IW_ERR_NO_TXBUFS;
+ list_add(&buf->list, rxlist);
+ return status;
+ }
+ /* modify txbuf's buffer header */
+ i40iw_ieq_setup_tx_buf(buf, txbuf);
+ /* copy full fpdu's to new buffer */
+ i40iw_ieq_copy_to_txbuf(buf, txbuf, ioffset, buf->hdrlen,
+ length);
+ txbuf->totallen = buf->hdrlen + length;
+
+ i40iw_ieq_update_tcpip_info(txbuf, length, buf->seqnum);
+ i40iw_puda_send_buf(ieq, txbuf);
+
+ if (!datalen) {
+ pfpdu->rcv_nxt = buf->seqnum + length;
+ i40iw_puda_ret_bufpool(ieq, buf);
+ return status;
+ }
+ buf->data = datap;
+ buf->seqnum = seqnum + length;
+ buf->datalen = datalen;
+ pfpdu->rcv_nxt = buf->seqnum;
+ }
+ if (partial)
+ status = i40iw_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
+
+ return status;
+}
+
+/**
+ * i40iw_ieq_process_fpdus - process fpdu's buffers on its list
+ * @qp: qp for which partial fpdus
+ * @ieq: ieq resource
+ */
+static void i40iw_ieq_process_fpdus(struct i40iw_sc_qp *qp,
+ struct i40iw_puda_rsrc *ieq)
+{
+ struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ struct i40iw_puda_buf *buf;
+ enum i40iw_status_code status;
+
+ do {
+ if (list_empty(rxlist))
+ break;
+ buf = i40iw_puda_get_listbuf(rxlist);
+ if (!buf) {
+ i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+ "%s: error no buf\n", __func__);
+ break;
+ }
+ if (buf->seqnum != pfpdu->rcv_nxt) {
+ /* This could be out of order or missing packet */
+ pfpdu->out_of_order++;
+ list_add(&buf->list, rxlist);
+ break;
+ }
+ /* keep processing buffers from the head of the list */
+ status = i40iw_ieq_process_buf(ieq, pfpdu, buf);
+ if (status == I40IW_ERR_MPA_CRC) {
+ pfpdu->mpa_crc_err = true;
+ while (!list_empty(rxlist)) {
+ buf = i40iw_puda_get_listbuf(rxlist);
+ i40iw_puda_ret_bufpool(ieq, buf);
+ pfpdu->crc_err++;
+ }
+ /* create CQP for AE */
+ i40iw_ieq_mpa_crc_ae(ieq->dev, qp);
+ }
+ } while (!status);
+}
+
+/**
+ * i40iw_ieq_handle_exception - handle qp's exception
+ * @ieq: ieq resource
+ * @qp: qp receiving excpetion
+ * @buf: receive buffer
+ */
+static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
+ struct i40iw_sc_qp *qp,
+ struct i40iw_puda_buf *buf)
+{
+ struct i40iw_puda_buf *tmpbuf = NULL;
+ struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+ u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
+ u32 rcv_wnd = hw_host_ctx[23];
+ /* first partial seq # in q2 */
+ u32 fps = qp->q2_buf[16];
+ struct list_head *rxlist = &pfpdu->rxlist;
+ struct list_head *plist;
+
+ pfpdu->total_ieq_bufs++;
+
+ if (pfpdu->mpa_crc_err) {
+ pfpdu->crc_err++;
+ goto error;
+ }
+ if (pfpdu->mode && (fps != pfpdu->fps)) {
+ /* clean up qp as it is new partial sequence */
+ i40iw_ieq_cleanup_qp(ieq->dev, qp);
+ i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+ "%s: restarting new partial\n", __func__);
+ pfpdu->mode = false;
+ }
+
+ if (!pfpdu->mode) {
+ i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "Q2 BUFFER", (u64 *)qp->q2_buf, 128);
+ /* First_Partial_Sequence_Number check */
+ pfpdu->rcv_nxt = fps;
+ pfpdu->fps = fps;
+ pfpdu->mode = true;
+ pfpdu->max_fpdu_data = ieq->mss;
+ pfpdu->pmode_count++;
+ INIT_LIST_HEAD(rxlist);
+ i40iw_ieq_check_first_buf(buf, fps);
+ }
+
+ if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
+ pfpdu->bad_seq_num++;
+ goto error;
+ }
+
+ if (!list_empty(rxlist)) {
+ tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
+ plist = &tmpbuf->list;
+ while ((struct list_head *)tmpbuf != rxlist) {
+ if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
+ break;
+ tmpbuf = (struct i40iw_puda_buf *)plist->next;
+ }
+ /* Insert buf before tmpbuf */
+ list_add_tail(&buf->list, &tmpbuf->list);
+ } else {
+ list_add_tail(&buf->list, rxlist);
+ }
+ i40iw_ieq_process_fpdus(qp, ieq);
+ return;
+ error:
+ i40iw_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * i40iw_ieq_receive - received exception buffer
+ * @dev: iwarp device
+ * @buf: exception buffer received
+ */
+static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_buf *buf)
+{
+ struct i40iw_puda_rsrc *ieq = dev->ieq;
+ struct i40iw_sc_qp *qp = NULL;
+ u32 wqe_idx = ieq->compl_rxwqe_idx;
+
+ qp = i40iw_ieq_get_qp(dev, buf);
+ if (!qp) {
+ ieq->stats_bad_qp_id++;
+ i40iw_puda_ret_bufpool(ieq, buf);
+ } else {
+ i40iw_ieq_handle_exception(ieq, qp, buf);
+ }
+ /*
+ * ieq->rx_wqe_idx is used by i40iw_puda_replenish_rq()
+ * on which wqe_idx to start replenish rq
+ */
+ if (!ieq->rxq_invalid_cnt)
+ ieq->rx_wqe_idx = wqe_idx;
+ ieq->rxq_invalid_cnt++;
+}
+
+/**
+ * i40iw_ieq_tx_compl - put back after sending completed exception buffer
+ * @dev: iwarp device
+ * @sqwrid: pointer to puda buffer
+ */
+static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
+{
+ struct i40iw_puda_rsrc *ieq = dev->ieq;
+ struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
+
+ i40iw_puda_ret_bufpool(ieq, buf);
+ if (!list_empty(&ieq->txpend)) {
+ buf = i40iw_puda_get_listbuf(&ieq->txpend);
+ i40iw_puda_send_buf(ieq, buf);
+ }
+}
+
+/**
+ * i40iw_ieq_cleanup_qp - qp is being destroyed
+ * @dev: iwarp device
+ * @qp: all pending fpdu buffers
+ */
+void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_puda_buf *buf;
+ struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ struct i40iw_puda_rsrc *ieq = dev->ieq;
+
+ if (!pfpdu->mode)
+ return;
+ while (!list_empty(rxlist)) {
+ buf = i40iw_puda_get_listbuf(rxlist);
+ i40iw_puda_ret_bufpool(ieq, buf);
+ }
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
new file mode 100644
index 000000000000..52bf7826ce4e
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -0,0 +1,183 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_PUDA_H
+#define I40IW_PUDA_H
+
+#define I40IW_IEQ_MPA_FRAMING 6
+
+struct i40iw_sc_dev;
+struct i40iw_sc_qp;
+struct i40iw_sc_cq;
+
+enum puda_resource_type {
+ I40IW_PUDA_RSRC_TYPE_ILQ = 1,
+ I40IW_PUDA_RSRC_TYPE_IEQ
+};
+
+enum puda_rsrc_complete {
+ PUDA_CQ_CREATED = 1,
+ PUDA_QP_CREATED,
+ PUDA_TX_COMPLETE,
+ PUDA_RX_COMPLETE,
+ PUDA_HASH_CRC_COMPLETE
+};
+
+struct i40iw_puda_completion_info {
+ struct i40iw_qp_uk *qp;
+ u8 q_type;
+ u8 vlan_valid;
+ u8 l3proto;
+ u8 l4proto;
+ u16 payload_len;
+ u32 compl_error; /* No_err=0, else major and minor err code */
+ u32 qp_id;
+ u32 wqe_idx;
+};
+
+struct i40iw_puda_send_info {
+ u64 paddr; /* Physical address */
+ u32 len;
+ u8 tcplen;
+ u8 maclen;
+ bool ipv4;
+ bool doloopback;
+ void *scratch;
+};
+
+struct i40iw_puda_buf {
+ struct list_head list; /* MUST be first entry */
+ struct i40iw_dma_mem mem; /* DMA memory for the buffer */
+ struct i40iw_puda_buf *next; /* for alloclist in rsrc struct */
+ struct i40iw_virt_mem buf_mem; /* Buffer memory for this buffer */
+ void *scratch;
+ u8 *iph;
+ u8 *tcph;
+ u8 *data;
+ u16 datalen;
+ u16 vlan_id;
+ u8 tcphlen; /* tcp length in bytes */
+ u8 maclen; /* mac length in bytes */
+ u32 totallen; /* machlen+iphlen+tcphlen+datalen */
+ atomic_t refcount;
+ u8 hdrlen;
+ bool ipv4;
+ u32 seqnum;
+};
+
+struct i40iw_puda_rsrc_info {
+ enum puda_resource_type type; /* ILQ or IEQ */
+ u32 count;
+ u16 pd_id;
+ u32 cq_id;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u16 buf_size;
+ u16 mss;
+ u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
+ void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
+ void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+};
+
+struct i40iw_puda_rsrc {
+ struct i40iw_sc_cq cq;
+ struct i40iw_sc_qp qp;
+ struct i40iw_sc_pd sc_pd;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_dma_mem cqmem;
+ struct i40iw_dma_mem qpmem;
+ struct i40iw_virt_mem ilq_mem;
+ enum puda_rsrc_complete completion;
+ enum puda_resource_type type;
+ u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */
+ u16 mss;
+ u32 cq_id;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u32 cq_size;
+ struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 compl_rxwqe_idx;
+ u32 rx_wqe_idx;
+ u32 rxq_invalid_cnt;
+ u32 tx_wqe_avail_cnt;
+ bool check_crc;
+ struct shash_desc *hash_desc;
+ struct list_head txpend;
+ struct list_head bufpool; /* free buffers pool list for recv and xmit */
+ u32 alloc_buf_count;
+ u32 avail_buf_count; /* snapshot of currently available buffers */
+ spinlock_t bufpool_lock;
+ struct i40iw_puda_buf *alloclist;
+ void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
+ void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+ /* puda stats */
+ u64 stats_buf_alloc_fail;
+ u64 stats_pkt_rcvd;
+ u64 stats_pkt_sent;
+ u64 stats_rcvd_pkt_err;
+ u64 stats_sent_pkt_q;
+ u64 stats_bad_qp_id;
+};
+
+struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc);
+void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
+ struct i40iw_puda_buf *buf);
+void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc,
+ struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
+ struct i40iw_puda_send_info *info);
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_rsrc_info *info);
+void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+ enum puda_resource_type type,
+ bool reset);
+enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_cq *cq, u32 *compl_err);
+void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+ struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
+ void *addr, u32 length, u32 value);
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc);
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+void i40iw_free_hash_desc(struct shash_desc *desc);
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
+ u32 seqnum);
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_register.h b/drivers/infiniband/hw/i40iw/i40iw_register.h
new file mode 100644
index 000000000000..57768184e251
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_register.h
@@ -0,0 +1,1030 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_REGISTER_H
+#define I40IW_REGISTER_H
+
+#define I40E_GLGEN_STAT 0x000B612C /* Reset: POR */
+
+#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
+#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_PDINV_PMSDIDX_MASK (0xFFF << I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
+#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
+#define I40E_PFHMC_PDINV_PMPDIDX_MASK (0x1FF << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT 31
+#define I40E_PFHMC_SDCMD_PMSDWR_MASK (0x1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT 0
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT 1
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK (0x3FF << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
+
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTLN_INTENA_MASK (0x1 << I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK (0x1 << I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+
+#define I40E_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
+#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT 15
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_MASK (0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT)
+#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT 4
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK (0x3 << I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_DREVID 0x0009C480 /* Reset: PCIR */
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK 0xFF
+
+#define I40E_PFPE_AEQALLOC 0x00131180 /* Reset: PFR */
+#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_PFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_PFPE_CCQPHIGH 0x00008200 /* Reset: PFR */
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_PFPE_CCQPLOW 0x00008180 /* Reset: PFR */
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_PFPE_CCQPSTATUS 0x00008100 /* Reset: PFR */
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_MASK (0x7 << I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_MASK (0x3F << I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_PFPE_CQACK 0x00131100 /* Reset: PFR */
+#define I40E_PFPE_CQACK_PECQID_SHIFT 0
+#define I40E_PFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_PFPE_CQACK_PECQID_SHIFT)
+#define I40E_PFPE_CQARM 0x00131080 /* Reset: PFR */
+#define I40E_PFPE_CQARM_PECQID_SHIFT 0
+#define I40E_PFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_PFPE_CQARM_PECQID_SHIFT)
+#define I40E_PFPE_CQPDB 0x00008000 /* Reset: PFR */
+#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_PFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_PFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_PFPE_CQPERRCODES 0x00008880 /* Reset: PFR */
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_PFPE_CQPTAIL 0x00008080 /* Reset: PFR */
+#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT 0
+#define I40E_PFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_PFPE_FLMQ1ALLOCERR 0x00008980 /* Reset: PFR */
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_FLMXMITALLOCERR 0x00008900 /* Reset: PFR */
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_IPCONFIG0 0x00008280 /* Reset: PFR */
+#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT 0
+#define I40E_PFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_PFPE_MRTEIDXMASK 0x00008600 /* Reset: PFR */
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_PFPE_RCVUNEXPECTEDERROR 0x00008680 /* Reset: PFR */
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_PFPE_TCPNOWTIMER 0x00008580 /* Reset: PFR */
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+
+#define I40E_PFPE_WQEALLOC 0x00138C00 /* Reset: PFR */
+#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT 0
+#define I40E_PFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+
+#define I40E_VFPE_AEQALLOC(_VF) (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC_MAX_INDEX 127
+#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH(_VF) (0x00001000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH_MAX_INDEX 127
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW(_VF) (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW_MAX_INDEX 127
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS(_VF) (0x00000800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS_MAX_INDEX 127
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_MASK (0x7 << I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_MASK (0x3F << I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK(_VF) (0x00130800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQACK_MAX_INDEX 127
+#define I40E_VFPE_CQACK_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK_PECQID_SHIFT)
+#define I40E_VFPE_CQARM(_VF) (0x00130400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQARM_MAX_INDEX 127
+#define I40E_VFPE_CQARM_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB(_VF) (0x00000000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPDB_MAX_INDEX 127
+#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES(_VF) (0x00001800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES_MAX_INDEX 127
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL(_VF) (0x00000400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL_MAX_INDEX 127
+#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT 0
+#define I40E_VFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG0(_VF) (0x00001400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG0_MAX_INDEX 127
+#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT 0
+#define I40E_VFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK(_VF) (0x00003000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX 127
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF) (0x00003400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX 127
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER(_VF) (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX 127
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC(_VF) (0x00138000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC_MAX_INDEX 127
+#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT 0
+#define I40E_VFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+
+#define I40E_GLPE_CPUSTATUS0 0x0000D040 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
+#define I40E_GLPE_CPUSTATUS1 0x0000D044 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
+#define I40E_GLPE_CPUSTATUS2 0x0000D048 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
+#define I40E_GLPE_CPUTRIG0 0x0000D060 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT 0
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_MASK (0xFFFF << I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT 17
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_MASK (0x1 << I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT 18
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_MASK (0x1 << I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT)
+#define I40E_GLPE_DUAL40_RUPM 0x0000DA04 /* Reset: PE_CORER */
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT 0
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_MASK (0x1 << I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT)
+#define I40E_GLPE_PFAEQEDROPCNT(_i) (0x00131440 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFAEQEDROPCNT_MAX_INDEX 15
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCEQEDROPCNT(_i) (0x001313C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCEQEDROPCNT_MAX_INDEX 15
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCQEDROPCNT(_i) (0x00131340 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCQEDROPCNT_MAX_INDEX 15
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_RUPM_CQPPOOL 0x0000DACC /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_MASK (0xFF << I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT)
+#define I40E_GLPE_RUPM_FLRPOOL 0x0000DAC4 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_MASK (0xFF << I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL 0x0000DA00 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT 0
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_MASK (0xFF << I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT 26
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT 27
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT 28
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT 29
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT 30
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT 31
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_MASK (0x1 << I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT)
+#define I40E_GLPE_RUPM_PTXPOOL 0x0000DAC8 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_MASK (0xFF << I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT)
+#define I40E_GLPE_RUPM_PUSHPOOL 0x0000DAC0 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_MASK (0xFF << I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT)
+#define I40E_GLPE_RUPM_TXHOST_EN 0x0000DA08 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT 0
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_MASK (0x1 << I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT)
+#define I40E_GLPE_VFAEQEDROPCNT(_i) (0x00132540 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFAEQEDROPCNT_MAX_INDEX 31
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCEQEDROPCNT(_i) (0x00132440 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCEQEDROPCNT_MAX_INDEX 31
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCQEDROPCNT(_i) (0x00132340 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCQEDROPCNT_MAX_INDEX 31
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX 31
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT 8
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMQ1ALLOCERR(_i) (0x0000C700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX 31
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFFLMXMITALLOCERR(_i) (0x0000C600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX 31
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFUDACTRL(_i) (0x0000C000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDACTRL_MAX_INDEX 31
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT 0
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT 1
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT 2
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT 3
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK (0x1 << I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN(_i) (0x0000C100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX 31
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT 0
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK (0x3FFFF << I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK (0x1 << I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
+
+#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSHI(_i) (0x00010804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSHI(_i) (0x00010A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSHI(_i) (0x00010C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSHI(_i) (0x00010204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSHI(_i) (0x00010404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSHI(_i) (0x00011E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSHI(_i) (0x00012004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSHI(_i) (0x00012204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSHI(_i) (0x00011A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSHI(_i) (0x00011C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSHI(_i) (0x00011404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSHI(_i) (0x00011604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSHI(_i) (0x00011804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSHI(_i) (0x00010E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSHI(_i) (0x00011004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSHI(_i) (0x00012804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSHI(_i) (0x00012A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSHI(_i) (0x00012C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSHI(_i) (0x00012404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSHI(_i) (0x00012604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSHI(_i) (0x00013E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSHI(_i) (0x00014004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSHI(_i) (0x00013C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSHI(_i) (0x00014404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSHI(_i) (0x00014604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSHI(_i) (0x00014204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDHI(_i) (0x00014804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_PFRDMAVINVHI(_i) (0x00014A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRXVLANERR_MAX_INDEX 15
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSHI(_i) (0x00013004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGHI(_i) (0x00013404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX 15
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX 15
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSHI(_i) (0x00013804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSHI(_i) (0x00013A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSHI 0x0001E014 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSLO 0x0001E010 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPHI 0x0001E01C /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPLO 0x0001E018 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
+#define I40E_GLPES_RDMARXOOONOMARK 0x0001E004 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
+#define I40E_GLPES_RDMARXUNALIGN 0x0001E000 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLEHI 0x0001E044 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLELO 0x0001E040 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLEHI 0x0001E02C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLELO 0x0001E028 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKHI 0x0001E024 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKSLO 0x0001E020 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLEHI 0x0001E03C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLELO 0x0001E038 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLEHI 0x0001E034 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLELO 0x0001E030 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTHI 0x0001E04C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTLO 0x0001E048 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTHI 0x0001E054 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTLO 0x0001E050 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSHI 0x0001E05C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSLO 0x0001E058 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXDISCARD(_i) (0x00018600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSHI(_i) (0x00018804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSLO(_i) (0x00018800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSHI(_i) (0x00018A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSLO(_i) (0x00018A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSHI(_i) (0x00018C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSLO(_i) (0x00018C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSHI(_i) (0x00018204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSLO(_i) (0x00018200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSHI(_i) (0x00018404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSLO(_i) (0x00018400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXTRUNC(_i) (0x00018700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSHI(_i) (0x00019E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSLO(_i) (0x00019E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSHI(_i) (0x0001A004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSLO(_i) (0x0001A000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSHI(_i) (0x0001A204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSLO(_i) (0x0001A200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXNOROUTE(_i) (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSHI(_i) (0x00019A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSLO(_i) (0x00019A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSHI(_i) (0x00019C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSLO(_i) (0x00019C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXDISCARD(_i) (0x00019200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSHI(_i) (0x00019404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSLO(_i) (0x00019400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSHI(_i) (0x00019604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSLO(_i) (0x00019600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSHI(_i) (0x00019804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSLO(_i) (0x00019800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSHI(_i) (0x00018E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSLO(_i) (0x00018E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSHI(_i) (0x00019004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSLO(_i) (0x00019000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXTRUNC(_i) (0x00019300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSHI(_i) (0x0001A804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSLO(_i) (0x0001A800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSHI(_i) (0x0001AA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSLO(_i) (0x0001AA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSHI(_i) (0x0001AC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSLO(_i) (0x0001AC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXNOROUTE(_i) (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSHI(_i) (0x0001A404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSLO(_i) (0x0001A400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSHI(_i) (0x0001A604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSLO(_i) (0x0001A600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSHI(_i) (0x0001BE04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSLO(_i) (0x0001BE00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSHI(_i) (0x0001C004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSLO(_i) (0x0001C000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSHI(_i) (0x0001BC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSLO(_i) (0x0001BC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSHI(_i) (0x0001C404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSLO(_i) (0x0001C400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSHI(_i) (0x0001C604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSLO(_i) (0x0001C600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSHI(_i) (0x0001C204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSLO(_i) (0x0001C200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDHI(_i) (0x0001C804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDLO(_i) (0x0001C800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_VFRDMAVINVHI(_i) (0x0001CA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_VFRDMAVINVLO(_i) (0x0001CA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_VFRXVLANERR(_i) (0x00018000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRXVLANERR_MAX_INDEX 31
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_VFTCPRTXSEG(_i) (0x0001B600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_VFTCPRXOPTERR(_i) (0x0001B200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_VFTCPRXPROTOERR(_i) (0x0001B300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSHI(_i) (0x0001B004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSLO(_i) (0x0001B000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGHI(_i) (0x0001B404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX 31
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGLO(_i) (0x0001B400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX 31
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSHI(_i) (0x0001B804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSLO(_i) (0x0001B800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSHI(_i) (0x0001BA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSLO(_i) (0x0001BA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+
+#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT 0
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK (0x7 << I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK (0x3F << I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT 31
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
+#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK1_PECQID_SHIFT)
+#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
+#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM1_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
+#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB1_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT 0
+#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG01 0x00008C00 /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT 0
+#define I40E_VFPE_IPCONFIG01_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK1 0x00009000 /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR1 0x00009400 /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER1 0x0000A800 /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT 0
+#define I40E_VFPE_WQEALLOC1_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
+#endif /* I40IW_REGISTER_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
new file mode 100644
index 000000000000..b0110c15e044
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_status.h
@@ -0,0 +1,100 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_STATUS_H
+#define I40IW_STATUS_H
+
+/* Error Codes */
+enum i40iw_status_code {
+ I40IW_SUCCESS = 0,
+ I40IW_ERR_NVM = -1,
+ I40IW_ERR_NVM_CHECKSUM = -2,
+ I40IW_ERR_CONFIG = -4,
+ I40IW_ERR_PARAM = -5,
+ I40IW_ERR_DEVICE_NOT_SUPPORTED = -6,
+ I40IW_ERR_RESET_FAILED = -7,
+ I40IW_ERR_SWFW_SYNC = -8,
+ I40IW_ERR_NO_MEMORY = -9,
+ I40IW_ERR_BAD_PTR = -10,
+ I40IW_ERR_INVALID_PD_ID = -11,
+ I40IW_ERR_INVALID_QP_ID = -12,
+ I40IW_ERR_INVALID_CQ_ID = -13,
+ I40IW_ERR_INVALID_CEQ_ID = -14,
+ I40IW_ERR_INVALID_AEQ_ID = -15,
+ I40IW_ERR_INVALID_SIZE = -16,
+ I40IW_ERR_INVALID_ARP_INDEX = -17,
+ I40IW_ERR_INVALID_FPM_FUNC_ID = -18,
+ I40IW_ERR_QP_INVALID_MSG_SIZE = -19,
+ I40IW_ERR_QP_TOOMANY_WRS_POSTED = -20,
+ I40IW_ERR_INVALID_FRAG_COUNT = -21,
+ I40IW_ERR_QUEUE_EMPTY = -22,
+ I40IW_ERR_INVALID_ALIGNMENT = -23,
+ I40IW_ERR_FLUSHED_QUEUE = -24,
+ I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25,
+ I40IW_ERR_INVALID_IMM_DATA_SIZE = -26,
+ I40IW_ERR_TIMEOUT = -27,
+ I40IW_ERR_OPCODE_MISMATCH = -28,
+ I40IW_ERR_CQP_COMPL_ERROR = -29,
+ I40IW_ERR_INVALID_VF_ID = -30,
+ I40IW_ERR_INVALID_HMCFN_ID = -31,
+ I40IW_ERR_BACKING_PAGE_ERROR = -32,
+ I40IW_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
+ I40IW_ERR_INVALID_PBLE_INDEX = -34,
+ I40IW_ERR_INVALID_SD_INDEX = -35,
+ I40IW_ERR_INVALID_PAGE_DESC_INDEX = -36,
+ I40IW_ERR_INVALID_SD_TYPE = -37,
+ I40IW_ERR_MEMCPY_FAILED = -38,
+ I40IW_ERR_INVALID_HMC_OBJ_INDEX = -39,
+ I40IW_ERR_INVALID_HMC_OBJ_COUNT = -40,
+ I40IW_ERR_INVALID_SRQ_ARM_LIMIT = -41,
+ I40IW_ERR_SRQ_ENABLED = -42,
+ I40IW_ERR_BUF_TOO_SHORT = -43,
+ I40IW_ERR_BAD_IWARP_CQE = -44,
+ I40IW_ERR_NVM_BLANK_MODE = -45,
+ I40IW_ERR_NOT_IMPLEMENTED = -46,
+ I40IW_ERR_PE_DOORBELL_NOT_ENABLED = -47,
+ I40IW_ERR_NOT_READY = -48,
+ I40IW_NOT_SUPPORTED = -49,
+ I40IW_ERR_FIRMWARE_API_VERSION = -50,
+ I40IW_ERR_RING_FULL = -51,
+ I40IW_ERR_MPA_CRC = -61,
+ I40IW_ERR_NO_TXBUFS = -62,
+ I40IW_ERR_SEQ_NUM = -63,
+ I40IW_ERR_list_empty = -64,
+ I40IW_ERR_INVALID_MAC_ADDR = -65,
+ I40IW_ERR_BAD_STAG = -66,
+ I40IW_ERR_CQ_COMPL_ERROR = -67,
+
+};
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
new file mode 100644
index 000000000000..edb3a8c8267a
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -0,0 +1,1312 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_TYPE_H
+#define I40IW_TYPE_H
+#include "i40iw_user.h"
+#include "i40iw_hmc.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+struct i40iw_cqp_sq_wqe {
+ u64 buf[I40IW_CQP_WQE_SIZE];
+};
+
+struct i40iw_sc_aeqe {
+ u64 buf[I40IW_AEQE_SIZE];
+};
+
+struct i40iw_ceqe {
+ u64 buf[I40IW_CEQE_SIZE];
+};
+
+struct i40iw_cqp_ctx {
+ u64 buf[I40IW_CQP_CTX_SIZE];
+};
+
+struct i40iw_cq_shadow_area {
+ u64 buf[I40IW_SHADOW_AREA_SIZE];
+};
+
+struct i40iw_sc_dev;
+struct i40iw_hmc_info;
+struct i40iw_dev_pestat;
+
+struct i40iw_cqp_ops;
+struct i40iw_ccq_ops;
+struct i40iw_ceq_ops;
+struct i40iw_aeq_ops;
+struct i40iw_mr_ops;
+struct i40iw_cqp_misc_ops;
+struct i40iw_pd_ops;
+struct i40iw_priv_qp_ops;
+struct i40iw_priv_cq_ops;
+struct i40iw_hmc_ops;
+
+enum i40iw_resource_indicator_type {
+ I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
+ I40IW_RSRC_INDICATOR_TYPE_CQ,
+ I40IW_RSRC_INDICATOR_TYPE_QP,
+ I40IW_RSRC_INDICATOR_TYPE_SRQ
+};
+
+enum i40iw_hdrct_flags {
+ DDP_LEN_FLAG = 0x80,
+ DDP_HDR_FLAG = 0x40,
+ RDMA_HDR_FLAG = 0x20
+};
+
+enum i40iw_term_layers {
+ LAYER_RDMA = 0,
+ LAYER_DDP = 1,
+ LAYER_MPA = 2
+};
+
+enum i40iw_term_error_types {
+ RDMAP_REMOTE_PROT = 1,
+ RDMAP_REMOTE_OP = 2,
+ DDP_CATASTROPHIC = 0,
+ DDP_TAGGED_BUFFER = 1,
+ DDP_UNTAGGED_BUFFER = 2,
+ DDP_LLP = 3
+};
+
+enum i40iw_term_rdma_errors {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_INV_BOUNDS = 0x01,
+ RDMAP_ACCESS = 0x02,
+ RDMAP_UNASSOC_STAG = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_RDMAP_VER = 0x05,
+ RDMAP_UNEXPECTED_OP = 0x06,
+ RDMAP_CATASTROPHIC_LOCAL = 0x07,
+ RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum i40iw_term_ddp_errors {
+ DDP_CATASTROPHIC_LOCAL = 0x00,
+ DDP_TAGGED_INV_STAG = 0x00,
+ DDP_TAGGED_BOUNDS = 0x01,
+ DDP_TAGGED_UNASSOC_STAG = 0x02,
+ DDP_TAGGED_TO_WRAP = 0x03,
+ DDP_TAGGED_INV_DDP_VER = 0x04,
+ DDP_UNTAGGED_INV_QN = 0x01,
+ DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+ DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+ DDP_UNTAGGED_INV_MO = 0x04,
+ DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+ DDP_UNTAGGED_INV_DDP_VER = 0x06
+};
+
+enum i40iw_term_mpa_errors {
+ MPA_CLOSED = 0x01,
+ MPA_CRC = 0x02,
+ MPA_MARKER = 0x03,
+ MPA_REQ_RSP = 0x04,
+};
+
+enum i40iw_flush_opcode {
+ FLUSH_INVALID = 0,
+ FLUSH_PROT_ERR,
+ FLUSH_REM_ACCESS_ERR,
+ FLUSH_LOC_QP_OP_ERR,
+ FLUSH_REM_OP_ERR,
+ FLUSH_LOC_LEN_ERR,
+ FLUSH_GENERAL_ERR,
+ FLUSH_FATAL_ERR
+};
+
+enum i40iw_term_eventtypes {
+ TERM_EVENT_QP_FATAL,
+ TERM_EVENT_QP_ACCESS_ERR
+};
+
+struct i40iw_terminate_hdr {
+ u8 layer_etype;
+ u8 error_code;
+ u8 hdrct;
+ u8 rsvd;
+};
+
+enum i40iw_debug_flag {
+ I40IW_DEBUG_NONE = 0x00000000,
+ I40IW_DEBUG_ERR = 0x00000001,
+ I40IW_DEBUG_INIT = 0x00000002,
+ I40IW_DEBUG_DEV = 0x00000004,
+ I40IW_DEBUG_CM = 0x00000008,
+ I40IW_DEBUG_VERBS = 0x00000010,
+ I40IW_DEBUG_PUDA = 0x00000020,
+ I40IW_DEBUG_ILQ = 0x00000040,
+ I40IW_DEBUG_IEQ = 0x00000080,
+ I40IW_DEBUG_QP = 0x00000100,
+ I40IW_DEBUG_CQ = 0x00000200,
+ I40IW_DEBUG_MR = 0x00000400,
+ I40IW_DEBUG_PBLE = 0x00000800,
+ I40IW_DEBUG_WQE = 0x00001000,
+ I40IW_DEBUG_AEQ = 0x00002000,
+ I40IW_DEBUG_CQP = 0x00004000,
+ I40IW_DEBUG_HMC = 0x00008000,
+ I40IW_DEBUG_USER = 0x00010000,
+ I40IW_DEBUG_VIRT = 0x00020000,
+ I40IW_DEBUG_DCB = 0x00040000,
+ I40IW_DEBUG_CQE = 0x00800000,
+ I40IW_DEBUG_ALL = 0xFFFFFFFF
+};
+
+enum i40iw_hw_stat_index_32b {
+ I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
+ I40IW_HW_STAT_INDEX_IP4RXTRUNC,
+ I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
+ I40IW_HW_STAT_INDEX_IP6RXDISCARD,
+ I40IW_HW_STAT_INDEX_IP6RXTRUNC,
+ I40IW_HW_STAT_INDEX_IP6TXNOROUTE,
+ I40IW_HW_STAT_INDEX_TCPRTXSEG,
+ I40IW_HW_STAT_INDEX_TCPRXOPTERR,
+ I40IW_HW_STAT_INDEX_TCPRXPROTOERR,
+ I40IW_HW_STAT_INDEX_MAX_32
+};
+
+enum i40iw_hw_stat_index_64b {
+ I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
+ I40IW_HW_STAT_INDEX_IP4RXPKTS,
+ I40IW_HW_STAT_INDEX_IP4RXFRAGS,
+ I40IW_HW_STAT_INDEX_IP4RXMCPKTS,
+ I40IW_HW_STAT_INDEX_IP4TXOCTS,
+ I40IW_HW_STAT_INDEX_IP4TXPKTS,
+ I40IW_HW_STAT_INDEX_IP4TXFRAGS,
+ I40IW_HW_STAT_INDEX_IP4TXMCPKTS,
+ I40IW_HW_STAT_INDEX_IP6RXOCTS,
+ I40IW_HW_STAT_INDEX_IP6RXPKTS,
+ I40IW_HW_STAT_INDEX_IP6RXFRAGS,
+ I40IW_HW_STAT_INDEX_IP6RXMCPKTS,
+ I40IW_HW_STAT_INDEX_IP6TXOCTS,
+ I40IW_HW_STAT_INDEX_IP6TXPKTS,
+ I40IW_HW_STAT_INDEX_IP6TXFRAGS,
+ I40IW_HW_STAT_INDEX_IP6TXMCPKTS,
+ I40IW_HW_STAT_INDEX_TCPRXSEGS,
+ I40IW_HW_STAT_INDEX_TCPTXSEG,
+ I40IW_HW_STAT_INDEX_RDMARXRDS,
+ I40IW_HW_STAT_INDEX_RDMARXSNDS,
+ I40IW_HW_STAT_INDEX_RDMARXWRS,
+ I40IW_HW_STAT_INDEX_RDMATXRDS,
+ I40IW_HW_STAT_INDEX_RDMATXSNDS,
+ I40IW_HW_STAT_INDEX_RDMATXWRS,
+ I40IW_HW_STAT_INDEX_RDMAVBND,
+ I40IW_HW_STAT_INDEX_RDMAVINV,
+ I40IW_HW_STAT_INDEX_MAX_64
+};
+
+struct i40iw_dev_hw_stat_offsets {
+ u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
+ u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
+};
+
+struct i40iw_dev_hw_stats {
+ u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32];
+ u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64];
+};
+
+struct i40iw_device_pestat_ops {
+ void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool);
+ void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *);
+ void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *);
+ void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *);
+ void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *);
+};
+
+struct i40iw_dev_pestat {
+ struct i40iw_hw *hw;
+ struct i40iw_device_pestat_ops ops;
+ struct i40iw_dev_hw_stats hw_stats;
+ struct i40iw_dev_hw_stats last_read_hw_stats;
+ struct i40iw_dev_hw_stat_offsets hw_stat_offsets;
+ struct timer_list stats_timer;
+ spinlock_t stats_lock; /* rdma stats lock */
+};
+
+struct i40iw_hw {
+ u8 __iomem *hw_addr;
+ void *dev_context;
+ struct i40iw_hmc_info hmc;
+};
+
+struct i40iw_pfpdu {
+ struct list_head rxlist;
+ u32 rcv_nxt;
+ u32 fps;
+ u32 max_fpdu_data;
+ bool mode;
+ bool mpa_crc_err;
+ u64 total_ieq_bufs;
+ u64 fpdu_processed;
+ u64 bad_seq_num;
+ u64 crc_err;
+ u64 no_tx_bufs;
+ u64 tx_err;
+ u64 out_of_order;
+ u64 pmode_count;
+};
+
+struct i40iw_sc_pd {
+ u32 size;
+ struct i40iw_sc_dev *dev;
+ u16 pd_id;
+};
+
+struct i40iw_cqp_quanta {
+ u64 elem[I40IW_CQP_WQE_SIZE];
+};
+
+struct i40iw_sc_cqp {
+ u32 size;
+ u64 sq_pa;
+ u64 host_ctx_pa;
+ void *back_cqp;
+ struct i40iw_sc_dev *dev;
+ enum i40iw_status_code (*process_cqp_sds)(struct i40iw_sc_dev *,
+ struct i40iw_update_sds_info *);
+ struct i40iw_dma_mem sdbuf;
+ struct i40iw_ring sq_ring;
+ struct i40iw_cqp_quanta *sq_base;
+ u64 *host_ctx;
+ u64 *scratch_array;
+ u32 cqp_id;
+ u32 sq_size;
+ u32 hw_sq_size;
+ u8 struct_ver;
+ u8 polarity;
+ bool en_datacenter_tcp;
+ u8 hmc_profile;
+ u8 enabled_vf_count;
+ u8 timeout_count;
+};
+
+struct i40iw_sc_aeq {
+ u32 size;
+ u64 aeq_elem_pa;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_sc_aeqe *aeqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ struct i40iw_ring aeq_ring;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ u8 polarity;
+};
+
+struct i40iw_sc_ceq {
+ u32 size;
+ u64 ceq_elem_pa;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_ceqe *ceqe_base;
+ void *pbl_list;
+ u32 ceq_id;
+ u32 elem_cnt;
+ struct i40iw_ring ceq_ring;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ bool tph_en;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+ u8 polarity;
+};
+
+struct i40iw_sc_cq {
+ struct i40iw_cq_uk cq_uk;
+ u64 cq_pa;
+ u64 shadow_area_pa;
+ struct i40iw_sc_dev *dev;
+ void *pbl_list;
+ void *back_cq;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ bool ceqe_mask;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ u8 cq_type;
+ bool ceq_id_valid;
+ bool tph_en;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+ bool check_overflow;
+};
+
+struct i40iw_sc_qp {
+ struct i40iw_qp_uk qp_uk;
+ u64 sq_pa;
+ u64 rq_pa;
+ u64 hw_host_ctx_pa;
+ u64 shadow_area_pa;
+ u64 q2_pa;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_sc_pd *pd;
+ u64 *hw_host_ctx;
+ void *llp_stream_handle;
+ void *back_qp;
+ struct i40iw_pfpdu pfpdu;
+ u8 *q2_buf;
+ u64 qp_compl_ctx;
+ u16 qs_handle;
+ u16 exception_lan_queue;
+ u16 push_idx;
+ u8 sq_tph_val;
+ u8 rq_tph_val;
+ u8 qp_state;
+ u8 qp_type;
+ u8 hw_sq_size;
+ u8 hw_rq_size;
+ u8 src_mac_addr_idx;
+ bool sq_tph_en;
+ bool rq_tph_en;
+ bool rcv_tph_en;
+ bool xmit_tph_en;
+ bool virtual_map;
+ bool flush_sq;
+ bool flush_rq;
+ bool sq_flush;
+ enum i40iw_flush_opcode flush_code;
+ enum i40iw_term_eventtypes eventtype;
+ u8 term_flags;
+};
+
+struct i40iw_hmc_fpm_misc {
+ u32 max_ceqs;
+ u32 max_sds;
+ u32 xf_block_size;
+ u32 q1_block_size;
+ u32 ht_multiplier;
+ u32 timer_bucket;
+};
+
+struct i40iw_vchnl_if {
+ enum i40iw_status_code (*vchnl_recv)(struct i40iw_sc_dev *, u32, u8 *, u16);
+ enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *dev, u32, u8 *, u16);
+};
+
+#define I40IW_VCHNL_MAX_VF_MSG_SIZE 512
+
+struct i40iw_vchnl_vf_msg_buffer {
+ struct i40iw_virtchnl_op_buf vchnl_msg;
+ char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
+};
+
+struct i40iw_vfdev {
+ struct i40iw_sc_dev *pf_dev;
+ u8 *hmc_info_mem;
+ struct i40iw_dev_pestat dev_pestat;
+ struct i40iw_hmc_pble_info *pble_info;
+ struct i40iw_hmc_info hmc_info;
+ struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
+ u64 fpm_query_buf_pa;
+ u64 *fpm_query_buf;
+ u32 vf_id;
+ u32 msg_count;
+ bool pf_hmc_initialized;
+ u16 pmf_index;
+ u16 iw_vf_idx; /* VF Device table index */
+ bool stats_initialized;
+};
+
+struct i40iw_sc_dev {
+ struct list_head cqp_cmd_head; /* head of the CQP command list */
+ spinlock_t cqp_lock; /* cqp list sync */
+ struct i40iw_dev_uk dev_uk;
+ struct i40iw_dev_pestat dev_pestat;
+ struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
+ u64 fpm_query_buf_pa;
+ u64 fpm_commit_buf_pa;
+ u64 *fpm_query_buf;
+ u64 *fpm_commit_buf;
+ void *back_dev;
+ struct i40iw_hw *hw;
+ u8 __iomem *db_addr;
+ struct i40iw_hmc_info *hmc_info;
+ struct i40iw_hmc_pble_info *pble_info;
+ struct i40iw_vfdev *vf_dev[I40IW_MAX_PE_ENABLED_VF_COUNT];
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_sc_aeq *aeq;
+ struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT];
+ struct i40iw_sc_cq *ccq;
+ struct i40iw_cqp_ops *cqp_ops;
+ struct i40iw_ccq_ops *ccq_ops;
+ struct i40iw_ceq_ops *ceq_ops;
+ struct i40iw_aeq_ops *aeq_ops;
+ struct i40iw_pd_ops *iw_pd_ops;
+ struct i40iw_priv_qp_ops *iw_priv_qp_ops;
+ struct i40iw_priv_cq_ops *iw_priv_cq_ops;
+ struct i40iw_mr_ops *mr_ops;
+ struct i40iw_cqp_misc_ops *cqp_misc_ops;
+ struct i40iw_hmc_ops *hmc_ops;
+ struct i40iw_vchnl_if vchnl_if;
+ u32 ilq_count;
+ struct i40iw_virt_mem ilq_mem;
+ struct i40iw_puda_rsrc *ilq;
+ u32 ieq_count;
+ struct i40iw_virt_mem ieq_mem;
+ struct i40iw_puda_rsrc *ieq;
+
+ struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
+
+ struct i40iw_hmc_fpm_misc hmc_fpm_misc;
+ u16 qs_handle;
+ u32 debug_mask;
+ u16 exception_lan_queue;
+ u8 hmc_fn_id;
+ bool is_pf;
+ bool vchnl_up;
+ u8 vf_id;
+ u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
+ struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
+ u8 hw_rev;
+};
+
+struct i40iw_modify_cq_info {
+ u64 cq_pa;
+ struct i40iw_cqe *cq_base;
+ void *pbl_list;
+ u32 ceq_id;
+ u32 cq_size;
+ u32 shadow_read_threshold;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ bool check_overflow;
+ bool cq_resize;
+ bool ceq_change;
+ bool check_overflow_change;
+ u32 first_pm_pbl_idx;
+ bool ceq_valid;
+};
+
+struct i40iw_create_qp_info {
+ u8 next_iwarp_state;
+ bool ord_valid;
+ bool tcp_ctx_valid;
+ bool cq_num_valid;
+ bool static_rsrc;
+ bool arp_cache_idx_valid;
+};
+
+struct i40iw_modify_qp_info {
+ u64 rx_win0;
+ u64 rx_win1;
+ u16 new_mss;
+ u8 next_iwarp_state;
+ u8 termlen;
+ bool ord_valid;
+ bool tcp_ctx_valid;
+ bool cq_num_valid;
+ bool static_rsrc;
+ bool arp_cache_idx_valid;
+ bool reset_tcp_conn;
+ bool remove_hash_idx;
+ bool dont_send_term;
+ bool dont_send_fin;
+ bool cached_var_valid;
+ bool mss_change;
+ bool force_loopback;
+};
+
+struct i40iw_ccq_cqe_info {
+ struct i40iw_sc_cqp *cqp;
+ u64 scratch;
+ u32 op_ret_val;
+ u16 maj_err_code;
+ u16 min_err_code;
+ u8 op_code;
+ bool error;
+};
+
+struct i40iw_l2params {
+ u16 qs_handle_list[I40IW_MAX_USER_PRIORITY];
+ u16 mss;
+};
+
+struct i40iw_device_init_info {
+ u64 fpm_query_buf_pa;
+ u64 fpm_commit_buf_pa;
+ u64 *fpm_query_buf;
+ u64 *fpm_commit_buf;
+ struct i40iw_hw *hw;
+ void __iomem *bar0;
+ enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
+ u16 qs_handle;
+ u16 exception_lan_queue;
+ u8 hmc_fn_id;
+ bool is_pf;
+ u32 debug_mask;
+};
+
+enum i40iw_cqp_hmc_profile {
+ I40IW_HMC_PROFILE_DEFAULT = 1,
+ I40IW_HMC_PROFILE_FAVOR_VF = 2,
+ I40IW_HMC_PROFILE_EQUAL = 3,
+};
+
+struct i40iw_cqp_init_info {
+ u64 cqp_compl_ctx;
+ u64 host_ctx_pa;
+ u64 sq_pa;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_cqp_quanta *sq;
+ u64 *host_ctx;
+ u64 *scratch_array;
+ u32 sq_size;
+ u8 struct_ver;
+ bool en_datacenter_tcp;
+ u8 hmc_profile;
+ u8 enabled_vf_count;
+};
+
+struct i40iw_ceq_init_info {
+ u64 ceqe_pa;
+ struct i40iw_sc_dev *dev;
+ u64 *ceqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ u32 ceq_id;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ bool tph_en;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+};
+
+struct i40iw_aeq_init_info {
+ u64 aeq_elem_pa;
+ struct i40iw_sc_dev *dev;
+ u32 *aeqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+};
+
+struct i40iw_ccq_init_info {
+ u64 cq_pa;
+ u64 shadow_area_pa;
+ struct i40iw_sc_dev *dev;
+ struct i40iw_cqe *cq_base;
+ u64 *shadow_area;
+ void *pbl_list;
+ u32 num_elem;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ bool ceqe_mask;
+ bool ceq_id_valid;
+ bool tph_en;
+ u8 tph_val;
+ bool avoid_mem_cflct;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+};
+
+struct i40iwarp_offload_info {
+ u16 rcv_mark_offset;
+ u16 snd_mark_offset;
+ u16 pd_id;
+ u8 ddp_ver;
+ u8 rdmap_ver;
+ u8 ord_size;
+ u8 ird_size;
+ bool wr_rdresp_en;
+ bool rd_enable;
+ bool snd_mark_en;
+ bool rcv_mark_en;
+ bool bind_en;
+ bool fast_reg_en;
+ bool priv_mode_en;
+ bool lsmm_present;
+ u8 iwarp_mode;
+ bool align_hdrs;
+ bool rcv_no_mpa_crc;
+
+ u8 last_byte_sent;
+};
+
+struct i40iw_tcp_offload_info {
+ bool ipv4;
+ bool no_nagle;
+ bool insert_vlan_tag;
+ bool time_stamp;
+ u8 cwnd_inc_limit;
+ bool drop_ooo_seg;
+ bool dup_ack_thresh;
+ u8 ttl;
+ u8 src_mac_addr_idx;
+ bool avoid_stretch_ack;
+ u8 tos;
+ u16 src_port;
+ u16 dst_port;
+ u32 dest_ip_addr0;
+ u32 dest_ip_addr1;
+ u32 dest_ip_addr2;
+ u32 dest_ip_addr3;
+ u32 snd_mss;
+ u16 vlan_tag;
+ u16 arp_idx;
+ u32 flow_label;
+ bool wscale;
+ u8 tcp_state;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+ u32 time_stamp_recent;
+ u32 time_stamp_age;
+ u32 snd_nxt;
+ u32 snd_wnd;
+ u32 rcv_nxt;
+ u32 rcv_wnd;
+ u32 snd_max;
+ u32 snd_una;
+ u32 srtt;
+ u32 rtt_var;
+ u32 ss_thresh;
+ u32 cwnd;
+ u32 snd_wl1;
+ u32 snd_wl2;
+ u32 max_snd_window;
+ u8 rexmit_thresh;
+ u32 local_ipaddr0;
+ u32 local_ipaddr1;
+ u32 local_ipaddr2;
+ u32 local_ipaddr3;
+ bool ignore_tcp_opt;
+ bool ignore_tcp_uns_opt;
+};
+
+struct i40iw_qp_host_ctx_info {
+ u64 qp_compl_ctx;
+ struct i40iw_tcp_offload_info *tcp_info;
+ struct i40iwarp_offload_info *iwarp_info;
+ u32 send_cq_num;
+ u32 rcv_cq_num;
+ u16 push_idx;
+ bool push_mode_en;
+ bool tcp_info_valid;
+ bool iwarp_info_valid;
+ bool err_rq_idx_valid;
+ u16 err_rq_idx;
+};
+
+struct i40iw_aeqe_info {
+ u64 compl_ctx;
+ u32 qp_cq_id;
+ u16 ae_id;
+ u16 wqe_idx;
+ u8 tcp_state;
+ u8 iwarp_state;
+ bool qp;
+ bool cq;
+ bool sq;
+ bool in_rdrsp_wr;
+ bool out_rdrsp;
+ u8 q2_data_written;
+ bool aeqe_overflow;
+};
+
+struct i40iw_allocate_stag_info {
+ u64 total_len;
+ u32 chunk_size;
+ u32 stag_idx;
+ u32 page_size;
+ u16 pd_id;
+ u16 access_rights;
+ bool remote_access;
+ bool use_hmc_fcn_index;
+ u8 hmc_fcn_index;
+ bool use_pf_rid;
+};
+
+struct i40iw_reg_ns_stag_info {
+ u64 reg_addr_pa;
+ u64 fbo;
+ void *va;
+ u64 total_len;
+ u32 page_size;
+ u32 chunk_size;
+ u32 first_pm_pbl_index;
+ enum i40iw_addressing_type addr_type;
+ i40iw_stag_index stag_idx;
+ u16 access_rights;
+ u16 pd_id;
+ i40iw_stag_key stag_key;
+ bool use_hmc_fcn_index;
+ u8 hmc_fcn_index;
+ bool use_pf_rid;
+};
+
+struct i40iw_fast_reg_stag_info {
+ u64 wr_id;
+ u64 reg_addr_pa;
+ u64 fbo;
+ void *va;
+ u64 total_len;
+ u32 page_size;
+ u32 chunk_size;
+ u32 first_pm_pbl_index;
+ enum i40iw_addressing_type addr_type;
+ i40iw_stag_index stag_idx;
+ u16 access_rights;
+ u16 pd_id;
+ i40iw_stag_key stag_key;
+ bool local_fence;
+ bool read_fence;
+ bool signaled;
+ bool use_hmc_fcn_index;
+ u8 hmc_fcn_index;
+ bool use_pf_rid;
+ bool defer_flag;
+};
+
+struct i40iw_dealloc_stag_info {
+ u32 stag_idx;
+ u16 pd_id;
+ bool mr;
+ bool dealloc_pbl;
+};
+
+struct i40iw_register_shared_stag {
+ void *va;
+ enum i40iw_addressing_type addr_type;
+ i40iw_stag_index new_stag_idx;
+ i40iw_stag_index parent_stag_idx;
+ u32 access_rights;
+ u16 pd_id;
+ i40iw_stag_key new_stag_key;
+};
+
+struct i40iw_qp_init_info {
+ struct i40iw_qp_uk_init_info qp_uk_init_info;
+ struct i40iw_sc_pd *pd;
+ u64 *host_ctx;
+ u8 *q2;
+ u64 sq_pa;
+ u64 rq_pa;
+ u64 host_ctx_pa;
+ u64 q2_pa;
+ u64 shadow_area_pa;
+ u8 sq_tph_val;
+ u8 rq_tph_val;
+ u8 type;
+ bool sq_tph_en;
+ bool rq_tph_en;
+ bool rcv_tph_en;
+ bool xmit_tph_en;
+ bool virtual_map;
+};
+
+struct i40iw_cq_init_info {
+ struct i40iw_sc_dev *dev;
+ u64 cq_base_pa;
+ u64 shadow_area_pa;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ bool virtual_map;
+ bool ceqe_mask;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ bool ceq_id_valid;
+ bool tph_en;
+ u8 tph_val;
+ u8 type;
+ struct i40iw_cq_uk_init_info cq_uk_init_info;
+};
+
+struct i40iw_upload_context_info {
+ u64 buf_pa;
+ bool freeze_qp;
+ bool raw_format;
+ u32 qp_id;
+ u8 qp_type;
+};
+
+struct i40iw_add_arp_cache_entry_info {
+ u8 mac_addr[6];
+ u32 reach_max;
+ u16 arp_index;
+ bool permanent;
+};
+
+struct i40iw_apbvt_info {
+ u16 port;
+ bool add;
+};
+
+enum i40iw_quad_entry_type {
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED = 1,
+ I40IW_QHASH_TYPE_TCP_SYN,
+};
+
+enum i40iw_quad_hash_manage_type {
+ I40IW_QHASH_MANAGE_TYPE_DELETE = 0,
+ I40IW_QHASH_MANAGE_TYPE_ADD,
+ I40IW_QHASH_MANAGE_TYPE_MODIFY
+};
+
+struct i40iw_qhash_table_info {
+ enum i40iw_quad_hash_manage_type manage;
+ enum i40iw_quad_entry_type entry_type;
+ bool vlan_valid;
+ bool ipv4_valid;
+ u8 mac_addr[6];
+ u16 vlan_id;
+ u16 qs_handle;
+ u32 qp_num;
+ u32 dest_ip[4];
+ u32 src_ip[4];
+ u32 dest_port;
+ u32 src_port;
+};
+
+struct i40iw_local_mac_ipaddr_entry_info {
+ u8 mac_addr[6];
+ u8 entry_idx;
+};
+
+struct i40iw_cqp_manage_push_page_info {
+ u32 push_idx;
+ u16 qs_handle;
+ u8 free_page;
+};
+
+struct i40iw_qp_flush_info {
+ u16 sq_minor_code;
+ u16 sq_major_code;
+ u16 rq_minor_code;
+ u16 rq_major_code;
+ u16 ae_code;
+ u8 ae_source;
+ bool sq;
+ bool rq;
+ bool userflushcode;
+ bool generate_ae;
+};
+
+struct i40iw_cqp_commit_fpm_values {
+ u64 qp_base;
+ u64 cq_base;
+ u32 hte_base;
+ u32 arp_base;
+ u32 apbvt_inuse_base;
+ u32 mr_base;
+ u32 xf_base;
+ u32 xffl_base;
+ u32 q1_base;
+ u32 q1fl_base;
+ u32 fsimc_base;
+ u32 fsiav_base;
+ u32 pbl_base;
+
+ u32 qp_cnt;
+ u32 cq_cnt;
+ u32 hte_cnt;
+ u32 arp_cnt;
+ u32 mr_cnt;
+ u32 xf_cnt;
+ u32 xffl_cnt;
+ u32 q1_cnt;
+ u32 q1fl_cnt;
+ u32 fsimc_cnt;
+ u32 fsiav_cnt;
+ u32 pbl_cnt;
+};
+
+struct i40iw_cqp_query_fpm_values {
+ u16 first_pe_sd_index;
+ u32 qp_objsize;
+ u32 cq_objsize;
+ u32 hte_objsize;
+ u32 arp_objsize;
+ u32 mr_objsize;
+ u32 xf_objsize;
+ u32 q1_objsize;
+ u32 fsimc_objsize;
+ u32 fsiav_objsize;
+
+ u32 qp_max;
+ u32 cq_max;
+ u32 hte_max;
+ u32 arp_max;
+ u32 mr_max;
+ u32 xf_max;
+ u32 xffl_max;
+ u32 q1_max;
+ u32 q1fl_max;
+ u32 fsimc_max;
+ u32 fsiav_max;
+ u32 pbl_max;
+};
+
+struct i40iw_cqp_ops {
+ enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
+ struct i40iw_cqp_init_info *);
+ enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *);
+ void (*cqp_post_sq)(struct i40iw_sc_cqp *);
+ u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
+ enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
+ enum i40iw_status_code (*poll_for_cqp_op_done)(struct i40iw_sc_cqp *, u8,
+ struct i40iw_ccq_cqe_info *);
+};
+
+struct i40iw_ccq_ops {
+ enum i40iw_status_code (*ccq_init)(struct i40iw_sc_cq *,
+ struct i40iw_ccq_init_info *);
+ enum i40iw_status_code (*ccq_create)(struct i40iw_sc_cq *, u64, bool, bool);
+ enum i40iw_status_code (*ccq_destroy)(struct i40iw_sc_cq *, u64, bool);
+ enum i40iw_status_code (*ccq_create_done)(struct i40iw_sc_cq *);
+ enum i40iw_status_code (*ccq_get_cqe_info)(struct i40iw_sc_cq *,
+ struct i40iw_ccq_cqe_info *);
+ void (*ccq_arm)(struct i40iw_sc_cq *);
+};
+
+struct i40iw_ceq_ops {
+ enum i40iw_status_code (*ceq_init)(struct i40iw_sc_ceq *,
+ struct i40iw_ceq_init_info *);
+ enum i40iw_status_code (*ceq_create)(struct i40iw_sc_ceq *, u64, bool);
+ enum i40iw_status_code (*cceq_create_done)(struct i40iw_sc_ceq *);
+ enum i40iw_status_code (*cceq_destroy_done)(struct i40iw_sc_ceq *);
+ enum i40iw_status_code (*cceq_create)(struct i40iw_sc_ceq *, u64);
+ enum i40iw_status_code (*ceq_destroy)(struct i40iw_sc_ceq *, u64, bool);
+ void *(*process_ceq)(struct i40iw_sc_dev *, struct i40iw_sc_ceq *);
+};
+
+struct i40iw_aeq_ops {
+ enum i40iw_status_code (*aeq_init)(struct i40iw_sc_aeq *,
+ struct i40iw_aeq_init_info *);
+ enum i40iw_status_code (*aeq_create)(struct i40iw_sc_aeq *, u64, bool);
+ enum i40iw_status_code (*aeq_destroy)(struct i40iw_sc_aeq *, u64, bool);
+ enum i40iw_status_code (*get_next_aeqe)(struct i40iw_sc_aeq *,
+ struct i40iw_aeqe_info *);
+ enum i40iw_status_code (*repost_aeq_entries)(struct i40iw_sc_dev *, u32);
+ enum i40iw_status_code (*aeq_create_done)(struct i40iw_sc_aeq *);
+ enum i40iw_status_code (*aeq_destroy_done)(struct i40iw_sc_aeq *);
+};
+
+struct i40iw_pd_ops {
+ void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16);
+};
+
+struct i40iw_priv_qp_ops {
+ enum i40iw_status_code (*qp_init)(struct i40iw_sc_qp *, struct i40iw_qp_init_info *);
+ enum i40iw_status_code (*qp_create)(struct i40iw_sc_qp *,
+ struct i40iw_create_qp_info *, u64, bool);
+ enum i40iw_status_code (*qp_modify)(struct i40iw_sc_qp *,
+ struct i40iw_modify_qp_info *, u64, bool);
+ enum i40iw_status_code (*qp_destroy)(struct i40iw_sc_qp *, u64, bool, bool, bool);
+ enum i40iw_status_code (*qp_flush_wqes)(struct i40iw_sc_qp *,
+ struct i40iw_qp_flush_info *, u64, bool);
+ enum i40iw_status_code (*qp_upload_context)(struct i40iw_sc_dev *,
+ struct i40iw_upload_context_info *,
+ u64, bool);
+ enum i40iw_status_code (*qp_setctx)(struct i40iw_sc_qp *, u64 *,
+ struct i40iw_qp_host_ctx_info *);
+
+ void (*qp_send_lsmm)(struct i40iw_sc_qp *, void *, u32, i40iw_stag);
+ void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
+ void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
+ enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
+};
+
+struct i40iw_priv_cq_ops {
+ enum i40iw_status_code (*cq_init)(struct i40iw_sc_cq *, struct i40iw_cq_init_info *);
+ enum i40iw_status_code (*cq_create)(struct i40iw_sc_cq *, u64, bool, bool);
+ enum i40iw_status_code (*cq_destroy)(struct i40iw_sc_cq *, u64, bool);
+ enum i40iw_status_code (*cq_modify)(struct i40iw_sc_cq *,
+ struct i40iw_modify_cq_info *, u64, bool);
+};
+
+struct i40iw_mr_ops {
+ enum i40iw_status_code (*alloc_stag)(struct i40iw_sc_dev *,
+ struct i40iw_allocate_stag_info *, u64, bool);
+ enum i40iw_status_code (*mr_reg_non_shared)(struct i40iw_sc_dev *,
+ struct i40iw_reg_ns_stag_info *,
+ u64, bool);
+ enum i40iw_status_code (*mr_reg_shared)(struct i40iw_sc_dev *,
+ struct i40iw_register_shared_stag *,
+ u64, bool);
+ enum i40iw_status_code (*dealloc_stag)(struct i40iw_sc_dev *,
+ struct i40iw_dealloc_stag_info *,
+ u64, bool);
+ enum i40iw_status_code (*query_stag)(struct i40iw_sc_dev *, u64, u32, bool);
+ enum i40iw_status_code (*mw_alloc)(struct i40iw_sc_dev *, u64, u32, u16, bool);
+};
+
+struct i40iw_cqp_misc_ops {
+ enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *,
+ struct i40iw_cqp_manage_push_page_info *,
+ u64, bool);
+ enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *,
+ u64, u8, bool, bool);
+ enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *,
+ u64, u8, u8, bool, bool);
+ enum i40iw_status_code (*commit_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
+ struct i40iw_dma_mem *, bool, u8);
+ enum i40iw_status_code (*query_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
+ struct i40iw_dma_mem *, bool, u8);
+ enum i40iw_status_code (*static_hmc_pages_allocated)(struct i40iw_sc_cqp *,
+ u64, u8, bool, bool);
+ enum i40iw_status_code (*add_arp_cache_entry)(struct i40iw_sc_cqp *,
+ struct i40iw_add_arp_cache_entry_info *,
+ u64, bool);
+ enum i40iw_status_code (*del_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
+ enum i40iw_status_code (*query_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
+ enum i40iw_status_code (*manage_apbvt_entry)(struct i40iw_sc_cqp *,
+ struct i40iw_apbvt_info *, u64, bool);
+ enum i40iw_status_code (*manage_qhash_table_entry)(struct i40iw_sc_cqp *,
+ struct i40iw_qhash_table_info *, u64, bool);
+ enum i40iw_status_code (*alloc_local_mac_ipaddr_table_entry)(struct i40iw_sc_cqp *, u64, bool);
+ enum i40iw_status_code (*add_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *,
+ struct i40iw_local_mac_ipaddr_entry_info *,
+ u64, bool);
+ enum i40iw_status_code (*del_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *, u64, u8, u8, bool);
+ enum i40iw_status_code (*cqp_nop)(struct i40iw_sc_cqp *, u64, bool);
+ enum i40iw_status_code (*commit_fpm_values_done)(struct i40iw_sc_cqp
+ *);
+ enum i40iw_status_code (*query_fpm_values_done)(struct i40iw_sc_cqp *);
+ enum i40iw_status_code (*manage_hmc_pm_func_table_done)(struct i40iw_sc_cqp *);
+ enum i40iw_status_code (*update_suspend_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
+ enum i40iw_status_code (*update_resume_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
+};
+
+struct i40iw_hmc_ops {
+ enum i40iw_status_code (*init_iw_hmc)(struct i40iw_sc_dev *, u8);
+ enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
+ struct i40iw_hmc_fpm_misc *);
+ enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
+ enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *);
+ enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_create_obj_info *);
+ enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_del_obj_info *,
+ bool reset);
+ enum i40iw_status_code (*pf_init_vfhmc)(struct i40iw_sc_dev *, u8, u32 *);
+ enum i40iw_status_code (*vf_configure_vffpm)(struct i40iw_sc_dev *, u32 *);
+};
+
+struct cqp_info {
+ union {
+ struct {
+ struct i40iw_sc_qp *qp;
+ struct i40iw_create_qp_info info;
+ u64 scratch;
+ } qp_create;
+
+ struct {
+ struct i40iw_sc_qp *qp;
+ struct i40iw_modify_qp_info info;
+ u64 scratch;
+ } qp_modify;
+
+ struct {
+ struct i40iw_sc_qp *qp;
+ u64 scratch;
+ bool remove_hash_idx;
+ bool ignore_mw_bnd;
+ } qp_destroy;
+
+ struct {
+ struct i40iw_sc_cq *cq;
+ u64 scratch;
+ bool check_overflow;
+ } cq_create;
+
+ struct {
+ struct i40iw_sc_cq *cq;
+ u64 scratch;
+ } cq_destroy;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_allocate_stag_info info;
+ u64 scratch;
+ } alloc_stag;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ u64 scratch;
+ u32 mw_stag_index;
+ u16 pd_id;
+ } mw_alloc;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_reg_ns_stag_info info;
+ u64 scratch;
+ } mr_reg_non_shared;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_dealloc_stag_info info;
+ u64 scratch;
+ } dealloc_stag;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_local_mac_ipaddr_entry_info info;
+ u64 scratch;
+ } add_local_mac_ipaddr_entry;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_add_arp_cache_entry_info info;
+ u64 scratch;
+ } add_arp_cache_entry;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ u64 scratch;
+ u8 entry_idx;
+ u8 ignore_ref_count;
+ } del_local_mac_ipaddr_entry;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ u64 scratch;
+ u16 arp_index;
+ } del_arp_cache_entry;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_manage_vf_pble_info info;
+ u64 scratch;
+ } manage_vf_pble_bp;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_cqp_manage_push_page_info info;
+ u64 scratch;
+ } manage_push_page;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_upload_context_info info;
+ u64 scratch;
+ } qp_upload_context;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ u64 scratch;
+ } alloc_local_mac_ipaddr_entry;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_hmc_fcn_info info;
+ u64 scratch;
+ } manage_hmc_pm;
+
+ struct {
+ struct i40iw_sc_ceq *ceq;
+ u64 scratch;
+ } ceq_create;
+
+ struct {
+ struct i40iw_sc_ceq *ceq;
+ u64 scratch;
+ } ceq_destroy;
+
+ struct {
+ struct i40iw_sc_aeq *aeq;
+ u64 scratch;
+ } aeq_create;
+
+ struct {
+ struct i40iw_sc_aeq *aeq;
+ u64 scratch;
+ } aeq_destroy;
+
+ struct {
+ struct i40iw_sc_qp *qp;
+ struct i40iw_qp_flush_info info;
+ u64 scratch;
+ } qp_flush_wqes;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ void *fpm_values_va;
+ u64 fpm_values_pa;
+ u8 hmc_fn_id;
+ u64 scratch;
+ } query_fpm_values;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ void *fpm_values_va;
+ u64 fpm_values_pa;
+ u8 hmc_fn_id;
+ u64 scratch;
+ } commit_fpm_values;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_apbvt_info info;
+ u64 scratch;
+ } manage_apbvt_entry;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_qhash_table_info info;
+ u64 scratch;
+ } manage_qhash_table_entry;
+
+ struct {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_update_sds_info info;
+ u64 scratch;
+ } update_pe_sds;
+
+ struct {
+ struct i40iw_sc_cqp *cqp;
+ struct i40iw_sc_qp *qp;
+ u64 scratch;
+ } suspend_resume;
+ } u;
+};
+
+struct cqp_commands_info {
+ struct list_head cqp_cmd_entry;
+ u8 cqp_cmd;
+ u8 post_sq;
+ struct cqp_info in;
+};
+
+struct i40iw_virtchnl_work_info {
+ void (*callback_fcn)(void *vf_dev);
+ void *worker_vf_dev;
+};
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
new file mode 100644
index 000000000000..12acd688def4
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2006 - 2016 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef I40IW_USER_CONTEXT_H
+#define I40IW_USER_CONTEXT_H
+
+#include <linux/types.h>
+
+#define I40IW_ABI_USERSPACE_VER 4
+#define I40IW_ABI_KERNEL_VER 4
+struct i40iw_alloc_ucontext_req {
+ __u32 reserved32;
+ __u8 userspace_ver;
+ __u8 reserved8[3];
+};
+
+struct i40iw_alloc_ucontext_resp {
+ __u32 max_pds; /* maximum pds allowed for this user process */
+ __u32 max_qps; /* maximum qps allowed for this user process */
+ __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
+ __u8 kernel_ver;
+ __u8 reserved[3];
+};
+
+struct i40iw_alloc_pd_resp {
+ __u32 pd_id;
+ __u8 reserved[4];
+};
+
+struct i40iw_create_cq_req {
+ __u64 user_cq_buffer;
+ __u64 user_shadow_area;
+};
+
+struct i40iw_create_qp_req {
+ __u64 user_wqe_buffers;
+ __u64 user_compl_ctx;
+
+ /* UDA QP PHB */
+ __u64 user_sq_phb; /* place for VA of the sq phb buff */
+ __u64 user_rq_phb; /* place for VA of the rq phb buff */
+};
+
+enum i40iw_memreg_type {
+ IW_MEMREG_TYPE_MEM = 0x0000,
+ IW_MEMREG_TYPE_QP = 0x0001,
+ IW_MEMREG_TYPE_CQ = 0x0002,
+};
+
+struct i40iw_mem_reg_req {
+ __u16 reg_type; /* Memory, QP or CQ */
+ __u16 cq_pages;
+ __u16 rq_pages;
+ __u16 sq_pages;
+};
+
+struct i40iw_create_cq_resp {
+ __u32 cq_id;
+ __u32 cq_size;
+ __u32 mmap_db_index;
+ __u32 reserved;
+};
+
+struct i40iw_create_qp_resp {
+ __u32 qp_id;
+ __u32 actual_sq_size;
+ __u32 actual_rq_size;
+ __u32 i40iw_drv_opt;
+ __u16 push_idx;
+ __u8 lsmm;
+ __u8 rsvd2;
+};
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
new file mode 100644
index 000000000000..f78c3dc8bdb2
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -0,0 +1,1204 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_status.h"
+#include "i40iw_d.h"
+#include "i40iw_user.h"
+#include "i40iw_register.h"
+
+static u32 nop_signature = 0x55550000;
+
+/**
+ * i40iw_nop_1 - insert a nop wqe and move head. no post work
+ * @qp: hw qp ptr
+ */
+static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
+{
+ u64 header, *wqe;
+ u64 *wqe_0 = NULL;
+ u32 wqe_idx, peek_head;
+ bool signaled = false;
+
+ if (!qp->sq_ring.head)
+ return I40IW_ERR_PARAM;
+
+ wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ wqe = qp->sq_base[wqe_idx].elem;
+ peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
+ wqe_0 = qp->sq_base[peek_head].elem;
+ if (peek_head)
+ wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+ else
+ wqe_0[3] = LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+ LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID) | nop_signature++;
+
+ wmb(); /* Memory barrier to ensure data is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+ return 0;
+}
+
+/**
+ * i40iw_qp_post_wr - post wr to hrdware
+ * @qp: hw qp ptr
+ */
+void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
+{
+ u64 temp;
+ u32 hw_sq_tail;
+ u32 sw_sq_head;
+
+ mb(); /* valid bit is written and loads completed before reading shadow */
+
+ /* read the doorbell shadow area */
+ get_64bit_val(qp->shadow_area, 0, &temp);
+
+ hw_sq_tail = (u32)RS_64(temp, I40IW_QP_DBSA_HW_SQ_TAIL);
+ sw_sq_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ if (sw_sq_head != hw_sq_tail) {
+ if (sw_sq_head > qp->initial_ring.head) {
+ if ((hw_sq_tail >= qp->initial_ring.head) &&
+ (hw_sq_tail < sw_sq_head)) {
+ writel(qp->qp_id, qp->wqe_alloc_reg);
+ }
+ } else if (sw_sq_head != qp->initial_ring.head) {
+ if ((hw_sq_tail >= qp->initial_ring.head) ||
+ (hw_sq_tail < sw_sq_head)) {
+ writel(qp->qp_id, qp->wqe_alloc_reg);
+ }
+ }
+ }
+
+ qp->initial_ring.head = qp->sq_ring.head;
+}
+
+/**
+ * i40iw_qp_ring_push_db - ring qp doorbell
+ * @qp: hw qp ptr
+ * @wqe_idx: wqe index
+ */
+static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
+{
+ set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
+ qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+}
+
+/**
+ * i40iw_qp_get_next_send_wqe - return next wqe ptr
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @wqe_size: size of sq wqe
+ */
+u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
+ u32 *wqe_idx,
+ u8 wqe_size)
+{
+ u64 *wqe = NULL;
+ u64 wqe_ptr;
+ u32 peek_head = 0;
+ u16 offset;
+ enum i40iw_status_code ret_code = 0;
+ u8 nop_wqe_cnt = 0, i;
+ u64 *wqe_0 = NULL;
+
+ *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ptr = (uintptr_t)qp->sq_base[*wqe_idx].elem;
+ offset = (u16)(wqe_ptr) & 0x7F;
+ if ((offset + wqe_size) > I40IW_QP_WQE_MAX_SIZE) {
+ nop_wqe_cnt = (u8)(I40IW_QP_WQE_MAX_SIZE - offset) / I40IW_QP_WQE_MIN_SIZE;
+ for (i = 0; i < nop_wqe_cnt; i++) {
+ i40iw_nop_1(qp);
+ I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return NULL;
+ }
+
+ *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ }
+ for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
+ I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return NULL;
+ }
+
+ wqe = qp->sq_base[*wqe_idx].elem;
+
+ peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ wqe_0 = qp->sq_base[peek_head].elem;
+ if (peek_head & 0x3)
+ wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+ return wqe;
+}
+
+/**
+ * i40iw_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ */
+static void i40iw_set_fragment(u64 *wqe, u32 offset, struct i40iw_sge *sge)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset, LS_64(sge->tag_off, I40IWQPSQ_FRAG_TO));
+ set_64bit_val(wqe, (offset + 8),
+ (LS_64(sge->len, I40IWQPSQ_FRAG_LEN) |
+ LS_64(sge->stag, I40IWQPSQ_FRAG_STAG)));
+ }
+}
+
+/**
+ * i40iw_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
+{
+ u64 *wqe = NULL;
+ enum i40iw_status_code ret_code;
+
+ if (I40IW_RING_FULL_ERR(qp->rq_ring))
+ return NULL;
+
+ I40IW_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+ if (!*wqe_idx)
+ qp->rwqe_polarity = !qp->rwqe_polarity;
+ /* rq_wqe_size_multiplier is no of qwords in one rq wqe */
+ wqe = qp->rq_base[*wqe_idx * (qp->rq_wqe_size_multiplier >> 2)].elem;
+
+ return wqe;
+}
+
+/**
+ * i40iw_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ bool post_sq)
+{
+ u64 header;
+ u64 *wqe;
+ struct i40iw_rdma_write *op_info;
+ u32 i, wqe_idx;
+ u32 total_size = 0, byte_off;
+ enum i40iw_status_code ret_code;
+ bool read_fence = false;
+ u8 wqe_size;
+
+ op_info = &info->op.rdma_write;
+ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].len;
+
+ if (total_size > I40IW_MAX_OUTBOUND_MESSAGE_SIZE)
+ return I40IW_ERR_QP_INVALID_MSG_SIZE;
+
+ read_fence |= info->read_fence;
+
+ ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_lo_sges, &wqe_size);
+ if (ret_code)
+ return ret_code;
+
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+ set_64bit_val(wqe, 16,
+ LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+ if (!op_info->rem_addr.stag)
+ return I40IW_ERR_BAD_STAG;
+
+ header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+ LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+ LS_64((op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0), I40IWQPSQ_ADDFRAGCNT) |
+ LS_64(read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
+
+ for (i = 1; i < op_info->num_lo_sges; i++) {
+ byte_off = 32 + (i - 1) * 16;
+ i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
+ }
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * i40iw_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ bool inv_stag,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_rdma_read *op_info;
+ u64 header;
+ u32 wqe_idx;
+ enum i40iw_status_code ret_code;
+ u8 wqe_size;
+ bool local_fence = false;
+
+ op_info = &info->op.rdma_read;
+ ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
+ if (ret_code)
+ return ret_code;
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len;
+ local_fence |= info->local_fence;
+
+ set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+ header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+ LS_64((inv_stag ? I40IWQP_OP_RDMA_READ_LOC_INV : I40IWQP_OP_RDMA_READ), I40IWQPSQ_OPCODE) |
+ LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_set_fragment(wqe, 0, &op_info->lo_addr);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * i40iw_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @stag_to_inv: stag_to_inv value
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ u32 stag_to_inv,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_post_send *op_info;
+ u64 header;
+ u32 i, wqe_idx, total_size = 0, byte_off;
+ enum i40iw_status_code ret_code;
+ bool read_fence = false;
+ u8 wqe_size;
+
+ op_info = &info->op.send;
+ if (qp->max_sq_frag_cnt < op_info->num_sges)
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].len;
+ ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_sges, &wqe_size);
+ if (ret_code)
+ return ret_code;
+
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ read_fence |= info->read_fence;
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+ set_64bit_val(wqe, 16, 0);
+ header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
+ LS_64(info->op_type, I40IWQPSQ_OPCODE) |
+ LS_64((op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0),
+ I40IWQPSQ_ADDFRAGCNT) |
+ LS_64(read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_set_fragment(wqe, 0, op_info->sg_list);
+
+ for (i = 1; i < op_info->num_sges; i++) {
+ byte_off = 32 + (i - 1) * 16;
+ i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
+ }
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * i40iw_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ bool post_sq)
+{
+ u64 *wqe;
+ u8 *dest, *src;
+ struct i40iw_inline_rdma_write *op_info;
+ u64 *push;
+ u64 header = 0;
+ u32 i, wqe_idx;
+ enum i40iw_status_code ret_code;
+ bool read_fence = false;
+ u8 wqe_size;
+
+ op_info = &info->op.inline_rdma_write;
+ if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
+ return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+ ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
+ if (ret_code)
+ return ret_code;
+
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ read_fence |= info->read_fence;
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+ set_64bit_val(wqe, 16,
+ LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+
+ header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+ LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+ LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
+ LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
+ LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
+ LS_64(read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ dest = (u8 *)wqe;
+ src = (u8 *)(op_info->data);
+
+ if (op_info->len <= 16) {
+ for (i = 0; i < op_info->len; i++, src++, dest++)
+ *dest = *src;
+ } else {
+ for (i = 0; i < 16; i++, src++, dest++)
+ *dest = *src;
+ dest = (u8 *)wqe + 32;
+ for (; i < op_info->len; i++, src++, dest++)
+ *dest = *src;
+ }
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ if (qp->push_db) {
+ push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
+ memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
+ i40iw_qp_ring_push_db(qp, wqe_idx);
+ } else {
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @stag_to_inv: remote stag
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ u32 stag_to_inv,
+ bool post_sq)
+{
+ u64 *wqe;
+ u8 *dest, *src;
+ struct i40iw_post_inline_send *op_info;
+ u64 header;
+ u32 wqe_idx, i;
+ enum i40iw_status_code ret_code;
+ bool read_fence = false;
+ u8 wqe_size;
+ u64 *push;
+
+ op_info = &info->op.inline_send;
+ if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
+ return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+ ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
+ if (ret_code)
+ return ret_code;
+
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ read_fence |= info->read_fence;
+
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+ header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
+ LS_64(info->op_type, I40IWQPSQ_OPCODE) |
+ LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
+ LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
+ LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
+ LS_64(read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ dest = (u8 *)wqe;
+ src = (u8 *)(op_info->data);
+
+ if (op_info->len <= 16) {
+ for (i = 0; i < op_info->len; i++, src++, dest++)
+ *dest = *src;
+ } else {
+ for (i = 0; i < 16; i++, src++, dest++)
+ *dest = *src;
+ dest = (u8 *)wqe + 32;
+ for (; i < op_info->len; i++, src++, dest++)
+ *dest = *src;
+ }
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ if (qp->push_db) {
+ push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
+ memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
+ i40iw_qp_ring_push_db(qp, wqe_idx);
+ } else {
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_inv_local_stag *op_info;
+ u64 header;
+ u32 wqe_idx;
+ bool local_fence = false;
+
+ op_info = &info->op.inv_local_stag;
+ local_fence = info->local_fence;
+
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8,
+ LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
+ set_64bit_val(wqe, 16, 0);
+ header = LS_64(I40IW_OP_TYPE_INV_STAG, I40IWQPSQ_OPCODE) |
+ LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * i40iw_mw_bind - Memory Window bind operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
+ struct i40iw_post_sq_info *info,
+ bool post_sq)
+{
+ u64 *wqe;
+ struct i40iw_bind_window *op_info;
+ u64 header;
+ u32 wqe_idx;
+ bool local_fence = false;
+
+ op_info = &info->op.bind_window;
+
+ local_fence |= info->local_fence;
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
+ LS_64(op_info->mw_stag, I40IWQPSQ_MWSTAG));
+ set_64bit_val(wqe, 16, op_info->bind_length);
+ header = LS_64(I40IW_OP_TYPE_BIND_MW, I40IWQPSQ_OPCODE) |
+ LS_64(((op_info->enable_reads << 2) |
+ (op_info->enable_writes << 3)),
+ I40IWQPSQ_STAGRIGHTS) |
+ LS_64((op_info->addressing_type == I40IW_ADDR_TYPE_VA_BASED ? 1 : 0),
+ I40IWQPSQ_VABASEDTO) |
+ LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * i40iw_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
+ struct i40iw_post_rq_info *info)
+{
+ u64 *wqe;
+ u64 header;
+ u32 total_size = 0, wqe_idx, i, byte_off;
+
+ if (qp->max_rq_frag_cnt < info->num_sges)
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+ for (i = 0; i < info->num_sges; i++)
+ total_size += info->sg_list[i].len;
+ wqe = i40iw_qp_get_next_recv_wqe(qp, &wqe_idx);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->rq_wrid_array[wqe_idx] = info->wr_id;
+ set_64bit_val(wqe, 16, 0);
+
+ header = LS_64((info->num_sges > 1 ? (info->num_sges - 1) : 0),
+ I40IWQPSQ_ADDFRAGCNT) |
+ LS_64(qp->rwqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_set_fragment(wqe, 0, info->sg_list);
+
+ for (i = 1; i < info->num_sges; i++) {
+ byte_off = 32 + (i - 1) * 16;
+ i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
+ }
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ return 0;
+}
+
+/**
+ * i40iw_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+static void i40iw_cq_request_notification(struct i40iw_cq_uk *cq,
+ enum i40iw_completion_notify cq_notify)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se = 0;
+ u8 arm_next = 0;
+ u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 32, &temp_val);
+ arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
+ arm_seq_num++;
+
+ sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
+ arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
+ arm_next_se |= 1;
+ if (cq_notify == IW_CQ_COMPL_EVENT)
+ arm_next = 1;
+ temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
+ LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
+ LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
+ LS_64(arm_next, I40IW_CQ_DBSA_ARM_NEXT);
+
+ set_64bit_val(cq->shadow_area, 32, temp_val);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ writel(cq->cq_id, cq->cqe_alloc_reg);
+}
+
+/**
+ * i40iw_cq_post_entries - update tail in shadow memory
+ * @cq: hw cq
+ * @count: # of entries processed
+ */
+static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
+ u8 count)
+{
+ I40IW_RING_MOVE_TAIL_BY_COUNT(cq->cq_ring, count);
+ set_64bit_val(cq->shadow_area, 0,
+ I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
+ return 0;
+}
+
+/**
+ * i40iw_cq_poll_completion - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ * @post_cq: update cq tail
+ */
+static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
+ struct i40iw_cq_poll_info *info,
+ bool post_cq)
+{
+ u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
+ u64 *cqe, *sw_wqe;
+ struct i40iw_qp_uk *qp;
+ struct i40iw_ring *pring = NULL;
+ u32 wqe_idx, q_type, array_idx = 0;
+ enum i40iw_status_code ret_code = 0;
+ enum i40iw_status_code ret_code2 = 0;
+ bool move_cq_head = true;
+ u8 polarity;
+ u8 addl_frag_cnt, addl_wqes = 0;
+
+ if (cq->avoid_mem_cflct)
+ cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
+ else
+ cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(cq);
+
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
+
+ if (polarity != cq->polarity)
+ return I40IW_ERR_QUEUE_EMPTY;
+
+ q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
+ info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
+ info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP);
+ if (info->error) {
+ info->comp_status = I40IW_COMPL_STATUS_FLUSHED;
+ info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR);
+ info->minor_err = (bool)RS_64(qword3, I40IW_CQ_MINERR);
+ } else {
+ info->comp_status = I40IW_COMPL_STATUS_SUCCESS;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->tcp_seq_num = (u8)RS_64(qword0, I40IWCQ_TCPSEQNUM);
+
+ info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+
+ info->solicited_event = (bool)RS_64(qword3, I40IWCQ_SOEVENT);
+ info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
+
+ qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+ wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
+ info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
+
+ if (q_type == I40IW_CQE_QTYPE_RQ) {
+ array_idx = (wqe_idx * 4) / qp->rq_wqe_size_multiplier;
+ if (info->comp_status == I40IW_COMPL_STATUS_FLUSHED) {
+ info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
+ array_idx = qp->rq_ring.tail;
+ } else {
+ info->wr_id = qp->rq_wrid_array[array_idx];
+ }
+
+ info->op_type = I40IW_OP_TYPE_REC;
+ if (qword3 & I40IWCQ_STAG_MASK) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)RS_64(qword2, I40IWCQ_INVSTAG);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ info->bytes_xfered = (u32)RS_64(qword0, I40IWCQ_PAYLDLEN);
+ I40IW_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
+ pring = &qp->rq_ring;
+ } else {
+ if (info->comp_status != I40IW_COMPL_STATUS_FLUSHED) {
+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+
+ info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
+ sw_wqe = qp->sq_base[wqe_idx].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ addl_frag_cnt =
+ (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
+ i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
+
+ addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+ I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
+ } else {
+ do {
+ u8 op_type;
+ u32 tail;
+
+ tail = qp->sq_ring.tail;
+ sw_wqe = qp->sq_base[tail].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
+ info->op_type = op_type;
+ addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
+ i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
+ addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+ I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
+ if (op_type != I40IWQP_OP_NOP) {
+ info->wr_id = qp->sq_wrtrk_array[tail].wrid;
+ info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
+ break;
+ }
+ } while (1);
+ }
+ pring = &qp->sq_ring;
+ }
+
+ ret_code = 0;
+
+ if (!ret_code &&
+ (info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
+ if (pring && (I40IW_RING_MORE_WORK(*pring)))
+ move_cq_head = false;
+
+ if (move_cq_head) {
+ I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2);
+
+ if (ret_code2 && !ret_code)
+ ret_code = ret_code2;
+
+ if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
+ cq->polarity ^= 1;
+
+ if (post_cq) {
+ I40IW_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
+ }
+ } else {
+ if (info->is_srq)
+ return ret_code;
+ qword3 &= ~I40IW_CQ_WQEIDX_MASK;
+ qword3 |= LS_64(pring->tail, I40IW_CQ_WQEIDX);
+ set_64bit_val(cqe, 24, qword3);
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40iw_get_wqe_shift - get shift count for maximum wqe size
+ * @wqdepth: depth of wq required.
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on sge.
+ * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1
+ * (64 bytes wqes) and 2 otherwise (128 bytes wqe).
+ */
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift)
+{
+ u32 size;
+
+ *shift = 0;
+ if (sge > 1)
+ *shift = (sge < 4) ? 1 : 2;
+
+ /* check if wqdepth is multiple of 2 or not */
+
+ if ((wqdepth < I40IWQP_SW_MIN_WQSIZE) || (wqdepth & (wqdepth - 1)))
+ return I40IW_ERR_INVALID_SIZE;
+
+ size = wqdepth << *shift; /* multiple of 32 bytes count */
+ if (size > I40IWQP_SW_MAX_WQSIZE)
+ return I40IW_ERR_INVALID_SIZE;
+ return 0;
+}
+
+static struct i40iw_qp_uk_ops iw_qp_uk_ops = {
+ i40iw_qp_post_wr,
+ i40iw_qp_ring_push_db,
+ i40iw_rdma_write,
+ i40iw_rdma_read,
+ i40iw_send,
+ i40iw_inline_rdma_write,
+ i40iw_inline_send,
+ i40iw_stag_local_invalidate,
+ i40iw_mw_bind,
+ i40iw_post_receive,
+ i40iw_nop
+};
+
+static struct i40iw_cq_ops iw_cq_ops = {
+ i40iw_cq_request_notification,
+ i40iw_cq_poll_completion,
+ i40iw_cq_post_entries,
+ i40iw_clean_cq
+};
+
+static struct i40iw_device_uk_ops iw_device_uk_ops = {
+ i40iw_cq_uk_init,
+ i40iw_qp_uk_init,
+};
+
+/**
+ * i40iw_qp_uk_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq. If srq is used,
+ * then rq_base will point to one rq wqe only (not the whole
+ * array of wqes)
+ */
+enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
+ struct i40iw_qp_uk_init_info *info)
+{
+ enum i40iw_status_code ret_code = 0;
+ u32 sq_ring_size;
+ u8 sqshift, rqshift;
+
+ if (info->max_sq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+
+ if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+ ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift);
+ if (ret_code)
+ return ret_code;
+
+ ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift);
+ if (ret_code)
+ return ret_code;
+
+ qp->sq_base = info->sq;
+ qp->rq_base = info->rq;
+ qp->shadow_area = info->shadow_area;
+ qp->sq_wrtrk_array = info->sq_wrtrk_array;
+ qp->rq_wrid_array = info->rq_wrid_array;
+
+ qp->wqe_alloc_reg = info->wqe_alloc_reg;
+ qp->qp_id = info->qp_id;
+
+ qp->sq_size = info->sq_size;
+ qp->push_db = info->push_db;
+ qp->push_wqe = info->push_wqe;
+
+ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+ sq_ring_size = qp->sq_size << sqshift;
+
+ I40IW_RING_INIT(qp->sq_ring, sq_ring_size);
+ I40IW_RING_INIT(qp->initial_ring, sq_ring_size);
+ I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ I40IW_RING_MOVE_TAIL(qp->sq_ring);
+ I40IW_RING_MOVE_HEAD(qp->initial_ring, ret_code);
+ qp->swqe_polarity = 1;
+ qp->swqe_polarity_deferred = 1;
+ qp->rwqe_polarity = 0;
+
+ if (!qp->use_srq) {
+ qp->rq_size = info->rq_size;
+ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+ qp->rq_wqe_size = rqshift;
+ I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
+ qp->rq_wqe_size_multiplier = 4 << rqshift;
+ }
+ qp->ops = iw_qp_uk_ops;
+
+ return ret_code;
+}
+
+/**
+ * i40iw_cq_uk_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
+ struct i40iw_cq_uk_init_info *info)
+{
+ if ((info->cq_size < I40IW_MIN_CQ_SIZE) ||
+ (info->cq_size > I40IW_MAX_CQ_SIZE))
+ return I40IW_ERR_INVALID_SIZE;
+ cq->cq_base = (struct i40iw_cqe *)info->cq_base;
+ cq->cq_id = info->cq_id;
+ cq->cq_size = info->cq_size;
+ cq->cqe_alloc_reg = info->cqe_alloc_reg;
+ cq->shadow_area = info->shadow_area;
+ cq->avoid_mem_cflct = info->avoid_mem_cflct;
+
+ I40IW_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+ cq->ops = iw_cq_ops;
+
+ return 0;
+}
+
+/**
+ * i40iw_device_init_uk - setup routines for iwarp shared device
+ * @dev: iwarp shared (user and kernel)
+ */
+void i40iw_device_init_uk(struct i40iw_dev_uk *dev)
+{
+ dev->ops_uk = iw_device_uk_ops;
+}
+
+/**
+ * i40iw_clean_cq - clean cq entries
+ * @ queue completion context
+ * @cq: cq to clean
+ */
+void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq)
+{
+ u64 *cqe;
+ u64 qword3, comp_ctx;
+ u32 cq_head;
+ u8 polarity, temp;
+
+ cq_head = cq->cq_ring.head;
+ temp = cq->polarity;
+ do {
+ if (cq->avoid_mem_cflct)
+ cqe = (u64 *)&(((struct i40iw_extended_cqe *)cq->cq_base)[cq_head]);
+ else
+ cqe = (u64 *)&cq->cq_base[cq_head];
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
+
+ if (polarity != temp)
+ break;
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if ((void *)(unsigned long)comp_ctx == queue)
+ set_64bit_val(cqe, 8, 0);
+
+ cq_head = (cq_head + 1) % cq->cq_ring.size;
+ if (!cq_head)
+ temp ^= 1;
+ } while (true);
+}
+
+/**
+ * i40iw_nop - send a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: flag if signaled for completion
+ * @post_sq: flag to post sq
+ */
+enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
+ u64 wr_id,
+ bool signaled,
+ bool post_sq)
+{
+ u64 header, *wqe;
+ u32 wqe_idx;
+
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->sq_wrtrk_array[wqe_idx].wrid = wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+ LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * i40iw_fragcnt_to_wqesize_sq - calculate wqe size based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size of sq wqe returned
+ */
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *wqe_size = I40IW_QP_WQE_MIN_SIZE;
+ break;
+ case 2:
+ case 3:
+ *wqe_size = 64;
+ break;
+ case 4:
+ case 5:
+ *wqe_size = 96;
+ break;
+ case 6:
+ case 7:
+ *wqe_size = 128;
+ break;
+ default:
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size of rq wqe returned
+ */
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *wqe_size = 32;
+ break;
+ case 2:
+ case 3:
+ *wqe_size = 64;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ *wqe_size = 128;
+ break;
+ default:
+ return I40IW_ERR_INVALID_FRAG_COUNT;
+ }
+
+ return 0;
+}
+
+/**
+ * i40iw_inline_data_size_to_wqesize - based on inline data, wqe size
+ * @data_size: data size for inline
+ * @wqe_size: size of sq wqe returned
+ */
+enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
+ u8 *wqe_size)
+{
+ if (data_size > I40IW_MAX_INLINE_DATA_SIZE)
+ return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+ if (data_size <= 16)
+ *wqe_size = I40IW_QP_WQE_MIN_SIZE;
+ else if (data_size <= 48)
+ *wqe_size = 64;
+ else if (data_size <= 80)
+ *wqe_size = 96;
+ else
+ *wqe_size = 128;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
new file mode 100644
index 000000000000..5cd971bb8cc7
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -0,0 +1,442 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_USER_H
+#define I40IW_USER_H
+
+enum i40iw_device_capabilities_const {
+ I40IW_WQE_SIZE = 4,
+ I40IW_CQP_WQE_SIZE = 8,
+ I40IW_CQE_SIZE = 4,
+ I40IW_EXTENDED_CQE_SIZE = 8,
+ I40IW_AEQE_SIZE = 2,
+ I40IW_CEQE_SIZE = 1,
+ I40IW_CQP_CTX_SIZE = 8,
+ I40IW_SHADOW_AREA_SIZE = 8,
+ I40IW_CEQ_MAX_COUNT = 256,
+ I40IW_QUERY_FPM_BUF_SIZE = 128,
+ I40IW_COMMIT_FPM_BUF_SIZE = 128,
+ I40IW_MIN_IW_QP_ID = 1,
+ I40IW_MAX_IW_QP_ID = 262143,
+ I40IW_MIN_CEQID = 0,
+ I40IW_MAX_CEQID = 256,
+ I40IW_MIN_CQID = 0,
+ I40IW_MAX_CQID = 131071,
+ I40IW_MIN_AEQ_ENTRIES = 1,
+ I40IW_MAX_AEQ_ENTRIES = 524287,
+ I40IW_MIN_CEQ_ENTRIES = 1,
+ I40IW_MAX_CEQ_ENTRIES = 131071,
+ I40IW_MIN_CQ_SIZE = 1,
+ I40IW_MAX_CQ_SIZE = 1048575,
+ I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
+ I40IW_DB_ID_ZERO = 0,
+ I40IW_MAX_WQ_FRAGMENT_COUNT = 6,
+ I40IW_MAX_SGE_RD = 1,
+ I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
+ I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
+ I40IW_MAX_PUSH_PAGE_COUNT = 4096,
+ I40IW_MAX_PE_ENABLED_VF_COUNT = 32,
+ I40IW_MAX_VF_FPM_ID = 47,
+ I40IW_MAX_VF_PER_PF = 127,
+ I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
+ I40IW_MAX_INLINE_DATA_SIZE = 112,
+ I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 112,
+ I40IW_MAX_IRD_SIZE = 32,
+ I40IW_QPCTX_ENCD_MAXIRD = 3,
+ I40IW_MAX_WQ_ENTRIES = 2048,
+ I40IW_MAX_ORD_SIZE = 32,
+ I40IW_Q2_BUFFER_SIZE = (248 + 100),
+ I40IW_QP_CTX_SIZE = 248
+};
+
+#define i40iw_handle void *
+#define i40iw_adapter_handle i40iw_handle
+#define i40iw_qp_handle i40iw_handle
+#define i40iw_cq_handle i40iw_handle
+#define i40iw_srq_handle i40iw_handle
+#define i40iw_pd_id i40iw_handle
+#define i40iw_stag_handle i40iw_handle
+#define i40iw_stag_index u32
+#define i40iw_stag u32
+#define i40iw_stag_key u8
+
+#define i40iw_tagged_offset u64
+#define i40iw_access_privileges u32
+#define i40iw_physical_fragment u64
+#define i40iw_address_list u64 *
+
+#define I40IW_CREATE_STAG(index, key) (((index) << 8) + (key))
+
+#define I40IW_STAG_KEY_FROM_STAG(stag) ((stag) && 0x000000FF)
+
+#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8)
+
+struct i40iw_qp_uk;
+struct i40iw_cq_uk;
+struct i40iw_srq_uk;
+struct i40iw_qp_uk_init_info;
+struct i40iw_cq_uk_init_info;
+struct i40iw_srq_uk_init_info;
+
+struct i40iw_sge {
+ i40iw_tagged_offset tag_off;
+ u32 len;
+ i40iw_stag stag;
+};
+
+#define i40iw_sgl struct i40iw_sge *
+
+struct i40iw_ring {
+ u32 head;
+ u32 tail;
+ u32 size;
+};
+
+struct i40iw_cqe {
+ u64 buf[I40IW_CQE_SIZE];
+};
+
+struct i40iw_extended_cqe {
+ u64 buf[I40IW_EXTENDED_CQE_SIZE];
+};
+
+struct i40iw_wqe {
+ u64 buf[I40IW_WQE_SIZE];
+};
+
+struct i40iw_qp_uk_ops;
+
+enum i40iw_addressing_type {
+ I40IW_ADDR_TYPE_ZERO_BASED = 0,
+ I40IW_ADDR_TYPE_VA_BASED = 1,
+};
+
+#define I40IW_ACCESS_FLAGS_LOCALREAD 0x01
+#define I40IW_ACCESS_FLAGS_LOCALWRITE 0x02
+#define I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
+#define I40IW_ACCESS_FLAGS_REMOTEREAD 0x05
+#define I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
+#define I40IW_ACCESS_FLAGS_REMOTEWRITE 0x0a
+#define I40IW_ACCESS_FLAGS_BIND_WINDOW 0x10
+#define I40IW_ACCESS_FLAGS_ALL 0x1F
+
+#define I40IW_OP_TYPE_RDMA_WRITE 0
+#define I40IW_OP_TYPE_RDMA_READ 1
+#define I40IW_OP_TYPE_SEND 3
+#define I40IW_OP_TYPE_SEND_INV 4
+#define I40IW_OP_TYPE_SEND_SOL 5
+#define I40IW_OP_TYPE_SEND_SOL_INV 6
+#define I40IW_OP_TYPE_REC 7
+#define I40IW_OP_TYPE_BIND_MW 8
+#define I40IW_OP_TYPE_FAST_REG_NSMR 9
+#define I40IW_OP_TYPE_INV_STAG 10
+#define I40IW_OP_TYPE_RDMA_READ_INV_STAG 11
+#define I40IW_OP_TYPE_NOP 12
+
+enum i40iw_completion_status {
+ I40IW_COMPL_STATUS_SUCCESS = 0,
+ I40IW_COMPL_STATUS_FLUSHED,
+ I40IW_COMPL_STATUS_INVALID_WQE,
+ I40IW_COMPL_STATUS_QP_CATASTROPHIC,
+ I40IW_COMPL_STATUS_REMOTE_TERMINATION,
+ I40IW_COMPL_STATUS_INVALID_STAG,
+ I40IW_COMPL_STATUS_BASE_BOUND_VIOLATION,
+ I40IW_COMPL_STATUS_ACCESS_VIOLATION,
+ I40IW_COMPL_STATUS_INVALID_PD_ID,
+ I40IW_COMPL_STATUS_WRAP_ERROR,
+ I40IW_COMPL_STATUS_STAG_INVALID_PDID,
+ I40IW_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+ I40IW_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+ I40IW_COMPL_STATUS_STAG_NOT_INVALID,
+ I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_SIZE,
+ I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_ENTRY,
+ I40IW_COMPL_STATUS_INVALID_FBO,
+ I40IW_COMPL_STATUS_INVALID_LENGTH,
+ I40IW_COMPL_STATUS_INVALID_ACCESS,
+ I40IW_COMPL_STATUS_PHYS_BUFFER_LIST_TOO_LONG,
+ I40IW_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+ I40IW_COMPL_STATUS_INVALID_REGION,
+ I40IW_COMPL_STATUS_INVALID_WINDOW,
+ I40IW_COMPL_STATUS_INVALID_TOTAL_LENGTH
+};
+
+enum i40iw_completion_notify {
+ IW_CQ_COMPL_EVENT = 0,
+ IW_CQ_COMPL_SOLICITED = 1
+};
+
+struct i40iw_post_send {
+ i40iw_sgl sg_list;
+ u8 num_sges;
+};
+
+struct i40iw_post_inline_send {
+ void *data;
+ u32 len;
+};
+
+struct i40iw_post_send_w_inv {
+ i40iw_sgl sg_list;
+ u32 num_sges;
+ i40iw_stag remote_stag_to_inv;
+};
+
+struct i40iw_post_inline_send_w_inv {
+ void *data;
+ u32 len;
+ i40iw_stag remote_stag_to_inv;
+};
+
+struct i40iw_rdma_write {
+ i40iw_sgl lo_sg_list;
+ u8 num_lo_sges;
+ struct i40iw_sge rem_addr;
+};
+
+struct i40iw_inline_rdma_write {
+ void *data;
+ u32 len;
+ struct i40iw_sge rem_addr;
+};
+
+struct i40iw_rdma_read {
+ struct i40iw_sge lo_addr;
+ struct i40iw_sge rem_addr;
+};
+
+struct i40iw_bind_window {
+ i40iw_stag mr_stag;
+ u64 bind_length;
+ void *va;
+ enum i40iw_addressing_type addressing_type;
+ bool enable_reads;
+ bool enable_writes;
+ i40iw_stag mw_stag;
+};
+
+struct i40iw_inv_local_stag {
+ i40iw_stag target_stag;
+};
+
+struct i40iw_post_sq_info {
+ u64 wr_id;
+ u8 op_type;
+ bool signaled;
+ bool read_fence;
+ bool local_fence;
+ bool inline_data;
+ bool defer_flag;
+ union {
+ struct i40iw_post_send send;
+ struct i40iw_post_send send_w_sol;
+ struct i40iw_post_send_w_inv send_w_inv;
+ struct i40iw_post_send_w_inv send_w_sol_inv;
+ struct i40iw_rdma_write rdma_write;
+ struct i40iw_rdma_read rdma_read;
+ struct i40iw_rdma_read rdma_read_inv;
+ struct i40iw_bind_window bind_window;
+ struct i40iw_inv_local_stag inv_local_stag;
+ struct i40iw_inline_rdma_write inline_rdma_write;
+ struct i40iw_post_inline_send inline_send;
+ struct i40iw_post_inline_send inline_send_w_sol;
+ struct i40iw_post_inline_send_w_inv inline_send_w_inv;
+ struct i40iw_post_inline_send_w_inv inline_send_w_sol_inv;
+ } op;
+};
+
+struct i40iw_post_rq_info {
+ u64 wr_id;
+ i40iw_sgl sg_list;
+ u32 num_sges;
+};
+
+struct i40iw_cq_poll_info {
+ u64 wr_id;
+ i40iw_qp_handle qp_handle;
+ u32 bytes_xfered;
+ u32 tcp_seq_num;
+ u32 qp_id;
+ i40iw_stag inv_stag;
+ enum i40iw_completion_status comp_status;
+ u16 major_err;
+ u16 minor_err;
+ u8 op_type;
+ bool stag_invalid_set;
+ bool push_dropped;
+ bool error;
+ bool is_srq;
+ bool solicited_event;
+};
+
+struct i40iw_qp_uk_ops {
+ void (*iw_qp_post_wr)(struct i40iw_qp_uk *);
+ void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32);
+ enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, bool);
+ enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, bool, bool);
+ enum i40iw_status_code (*iw_send)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, u32, bool);
+ enum i40iw_status_code (*iw_inline_rdma_write)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, bool);
+ enum i40iw_status_code (*iw_inline_send)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, u32, bool);
+ enum i40iw_status_code (*iw_stag_local_invalidate)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, bool);
+ enum i40iw_status_code (*iw_mw_bind)(struct i40iw_qp_uk *,
+ struct i40iw_post_sq_info *, bool);
+ enum i40iw_status_code (*iw_post_receive)(struct i40iw_qp_uk *,
+ struct i40iw_post_rq_info *);
+ enum i40iw_status_code (*iw_post_nop)(struct i40iw_qp_uk *, u64, bool, bool);
+};
+
+struct i40iw_cq_ops {
+ void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
+ enum i40iw_completion_notify);
+ enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
+ struct i40iw_cq_poll_info *, bool);
+ enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
+ void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
+};
+
+struct i40iw_dev_uk;
+
+struct i40iw_device_uk_ops {
+ enum i40iw_status_code (*iwarp_cq_uk_init)(struct i40iw_cq_uk *,
+ struct i40iw_cq_uk_init_info *);
+ enum i40iw_status_code (*iwarp_qp_uk_init)(struct i40iw_qp_uk *,
+ struct i40iw_qp_uk_init_info *);
+};
+
+struct i40iw_dev_uk {
+ struct i40iw_device_uk_ops ops_uk;
+};
+
+struct i40iw_sq_uk_wr_trk_info {
+ u64 wrid;
+ u64 wr_len;
+};
+
+struct i40iw_qp_quanta {
+ u64 elem[I40IW_WQE_SIZE];
+};
+
+struct i40iw_qp_uk {
+ struct i40iw_qp_quanta *sq_base;
+ struct i40iw_qp_quanta *rq_base;
+ u32 __iomem *wqe_alloc_reg;
+ struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u64 *shadow_area;
+ u32 *push_db;
+ u64 *push_wqe;
+ struct i40iw_ring sq_ring;
+ struct i40iw_ring rq_ring;
+ struct i40iw_ring initial_ring;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ struct i40iw_qp_uk_ops ops;
+ bool use_srq;
+ u8 swqe_polarity;
+ u8 swqe_polarity_deferred;
+ u8 rwqe_polarity;
+ u8 rq_wqe_size;
+ u8 rq_wqe_size_multiplier;
+ u8 max_sq_frag_cnt;
+ u8 max_rq_frag_cnt;
+ bool deferred_flag;
+};
+
+struct i40iw_cq_uk {
+ struct i40iw_cqe *cq_base;
+ u32 __iomem *cqe_alloc_reg;
+ u64 *shadow_area;
+ u32 cq_id;
+ u32 cq_size;
+ struct i40iw_ring cq_ring;
+ u8 polarity;
+ bool avoid_mem_cflct;
+
+ struct i40iw_cq_ops ops;
+};
+
+struct i40iw_qp_uk_init_info {
+ struct i40iw_qp_quanta *sq;
+ struct i40iw_qp_quanta *rq;
+ u32 __iomem *wqe_alloc_reg;
+ u64 *shadow_area;
+ struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 *push_db;
+ u64 *push_wqe;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u8 max_sq_frag_cnt;
+ u8 max_rq_frag_cnt;
+
+};
+
+struct i40iw_cq_uk_init_info {
+ u32 __iomem *cqe_alloc_reg;
+ struct i40iw_cqe *cq_base;
+ u64 *shadow_area;
+ u32 cq_size;
+ u32 cq_id;
+ bool avoid_mem_cflct;
+};
+
+void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
+
+void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
+u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
+ u8 wqe_size);
+u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
+u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
+
+enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
+ struct i40iw_cq_uk_init_info *info);
+enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
+ struct i40iw_qp_uk_init_info *info);
+
+void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
+enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
+ bool signaled, bool post_sq);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
+ u8 *wqe_size);
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift);
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
new file mode 100644
index 000000000000..1ceec81bd8eb
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -0,0 +1,1270 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include "i40iw.h"
+
+/**
+ * i40iw_arp_table - manage arp table
+ * @iwdev: iwarp device
+ * @ip_addr: ip address for device
+ * @mac_addr: mac address ptr
+ * @action: modify, delete or add
+ */
+int i40iw_arp_table(struct i40iw_device *iwdev,
+ __be32 *ip_addr,
+ bool ipv4,
+ u8 *mac_addr,
+ u32 action)
+{
+ int arp_index;
+ int err;
+ u32 ip[4];
+
+ if (ipv4) {
+ memset(ip, 0, sizeof(ip));
+ ip[0] = *ip_addr;
+ } else {
+ memcpy(ip, ip_addr, sizeof(ip));
+ }
+
+ for (arp_index = 0; (u32)arp_index < iwdev->arp_table_size; arp_index++)
+ if (memcmp(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip)) == 0)
+ break;
+ switch (action) {
+ case I40IW_ARP_ADD:
+ if (arp_index != iwdev->arp_table_size)
+ return -1;
+
+ arp_index = 0;
+ err = i40iw_alloc_resource(iwdev, iwdev->allocated_arps,
+ iwdev->arp_table_size,
+ (u32 *)&arp_index,
+ &iwdev->next_arp_index);
+
+ if (err)
+ return err;
+
+ memcpy(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip));
+ ether_addr_copy(iwdev->arp_table[arp_index].mac_addr, mac_addr);
+ break;
+ case I40IW_ARP_RESOLVE:
+ if (arp_index == iwdev->arp_table_size)
+ return -1;
+ break;
+ case I40IW_ARP_DELETE:
+ if (arp_index == iwdev->arp_table_size)
+ return -1;
+ memset(iwdev->arp_table[arp_index].ip_addr, 0,
+ sizeof(iwdev->arp_table[arp_index].ip_addr));
+ eth_zero_addr(iwdev->arp_table[arp_index].mac_addr);
+ i40iw_free_resource(iwdev, iwdev->allocated_arps, arp_index);
+ break;
+ default:
+ return -1;
+ }
+ return arp_index;
+}
+
+/**
+ * i40iw_wr32 - write 32 bits to hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ * @value: vvalue to write to register
+ */
+inline void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value)
+{
+ writel(value, hw->hw_addr + reg);
+}
+
+/**
+ * i40iw_rd32 - read a 32 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
+{
+ return readl(hw->hw_addr + reg);
+}
+
+/**
+ * i40iw_inetaddr_event - system notifier for netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int i40iw_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+ struct net_device *netdev;
+ struct net_device *upper_dev;
+ struct i40iw_device *iwdev;
+ struct i40iw_handler *hdl;
+ __be32 local_ipaddr;
+
+ hdl = i40iw_find_netdev(event_netdev);
+ if (!hdl)
+ return NOTIFY_DONE;
+
+ iwdev = &hdl->device;
+ netdev = iwdev->ldev->netdev;
+ upper_dev = netdev_master_upper_dev_get(netdev);
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ if (upper_dev)
+ local_ipaddr =
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ else
+ local_ipaddr = ifa->ifa_address;
+ local_ipaddr = ntohl(local_ipaddr);
+ i40iw_manage_arp_cache(iwdev,
+ netdev->dev_addr,
+ &local_ipaddr,
+ true,
+ I40IW_ARP_DELETE);
+ return NOTIFY_OK;
+ case NETDEV_UP:
+ if (upper_dev)
+ local_ipaddr =
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ else
+ local_ipaddr = ifa->ifa_address;
+ local_ipaddr = ntohl(local_ipaddr);
+ i40iw_manage_arp_cache(iwdev,
+ netdev->dev_addr,
+ &local_ipaddr,
+ true,
+ I40IW_ARP_ADD);
+ break;
+ case NETDEV_CHANGEADDR:
+ /* Add the address to the IP table */
+ if (upper_dev)
+ local_ipaddr =
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ else
+ local_ipaddr = ifa->ifa_address;
+
+ local_ipaddr = ntohl(local_ipaddr);
+ i40iw_manage_arp_cache(iwdev,
+ netdev->dev_addr,
+ &local_ipaddr,
+ true,
+ I40IW_ARP_ADD);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_inet6addr_event - system notifier for ipv6 netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int i40iw_inet6addr_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct net_device *event_netdev = ifa->idev->dev;
+ struct net_device *netdev;
+ struct i40iw_device *iwdev;
+ struct i40iw_handler *hdl;
+ __be32 local_ipaddr6[4];
+
+ hdl = i40iw_find_netdev(event_netdev);
+ if (!hdl)
+ return NOTIFY_DONE;
+
+ iwdev = &hdl->device;
+ netdev = iwdev->ldev->netdev;
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+ i40iw_manage_arp_cache(iwdev,
+ netdev->dev_addr,
+ local_ipaddr6,
+ false,
+ I40IW_ARP_DELETE);
+ return NOTIFY_OK;
+ case NETDEV_UP:
+ /* Fall through */
+ case NETDEV_CHANGEADDR:
+ i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+ i40iw_manage_arp_cache(iwdev,
+ netdev->dev_addr,
+ local_ipaddr6,
+ false,
+ I40IW_ARP_ADD);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_net_event - system notifier for net events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: neighbor
+ */
+int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *ptr)
+{
+ struct neighbour *neigh = ptr;
+ struct i40iw_device *iwdev;
+ struct i40iw_handler *iwhdl;
+ __be32 *p;
+ u32 local_ipaddr[4];
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ iwhdl = i40iw_find_netdev((struct net_device *)neigh->dev);
+ if (!iwhdl)
+ return NOTIFY_DONE;
+ iwdev = &iwhdl->device;
+ p = (__be32 *)neigh->primary_key;
+ i40iw_copy_ip_ntohl(local_ipaddr, p);
+ if (neigh->nud_state & NUD_VALID) {
+ i40iw_manage_arp_cache(iwdev,
+ neigh->ha,
+ local_ipaddr,
+ false,
+ I40IW_ARP_ADD);
+
+ } else {
+ i40iw_manage_arp_cache(iwdev,
+ neigh->ha,
+ local_ipaddr,
+ false,
+ I40IW_ARP_DELETE);
+ }
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_get_cqp_request - get cqp struct
+ * @cqp: device cqp ptr
+ * @wait: cqp to be used in wait mode
+ */
+struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait)
+{
+ struct i40iw_cqp_request *cqp_request = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ if (!list_empty(&cqp->cqp_avail_reqs)) {
+ cqp_request = list_entry(cqp->cqp_avail_reqs.next,
+ struct i40iw_cqp_request, list);
+ list_del_init(&cqp_request->list);
+ }
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ if (!cqp_request) {
+ cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
+ if (cqp_request) {
+ cqp_request->dynamic = true;
+ INIT_LIST_HEAD(&cqp_request->list);
+ init_waitqueue_head(&cqp_request->waitq);
+ }
+ }
+ if (!cqp_request) {
+ i40iw_pr_err("CQP Request Fail: No Memory");
+ return NULL;
+ }
+
+ if (wait) {
+ atomic_set(&cqp_request->refcount, 2);
+ cqp_request->waiting = true;
+ } else {
+ atomic_set(&cqp_request->refcount, 1);
+ }
+ return cqp_request;
+}
+
+/**
+ * i40iw_free_cqp_request - free cqp request
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
+{
+ unsigned long flags;
+
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+ cqp_request->request_done = false;
+ cqp_request->callback_fcn = NULL;
+ cqp_request->waiting = false;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ }
+}
+
+/**
+ * i40iw_put_cqp_request - dec ref count and free if 0
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
+ struct i40iw_cqp_request *cqp_request)
+{
+ if (atomic_dec_and_test(&cqp_request->refcount))
+ i40iw_free_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * i40iw_free_qp - callback after destroy cqp completes
+ * @cqp_request: cqp request for destroy qp
+ * @num: not used
+ */
+static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
+{
+ struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
+ struct i40iw_device *iwdev;
+ u32 qp_num = iwqp->ibqp.qp_num;
+
+ iwdev = iwqp->iwdev;
+
+ i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+ i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+}
+
+/**
+ * i40iw_wait_event - wait for completion
+ * @iwdev: iwarp device
+ * @cqp_request: cqp request to wait
+ */
+static int i40iw_wait_event(struct i40iw_device *iwdev,
+ struct i40iw_cqp_request *cqp_request)
+{
+ struct cqp_commands_info *info = &cqp_request->info;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ bool cqp_error = false;
+ int err_code = 0;
+ int timeout_ret = 0;
+
+ timeout_ret = wait_event_timeout(cqp_request->waitq,
+ cqp_request->request_done,
+ I40IW_EVENT_TIMEOUT);
+ if (!timeout_ret) {
+ i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
+ info->cqp_cmd, timeout_ret);
+ err_code = -ETIME;
+ i40iw_request_reset(iwdev);
+ goto done;
+ }
+ cqp_error = cqp_request->compl_info.error;
+ if (cqp_error) {
+ i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n",
+ info->cqp_cmd, cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code);
+ err_code = -EPROTO;
+ goto done;
+ }
+done:
+ i40iw_put_cqp_request(iwcqp, cqp_request);
+ return err_code;
+}
+
+/**
+ * i40iw_handle_cqp_op - process cqp command
+ * @iwdev: iwarp device
+ * @cqp_request: cqp request to process
+ */
+enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
+ struct i40iw_cqp_request
+ *cqp_request)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ enum i40iw_status_code status;
+ struct cqp_commands_info *info = &cqp_request->info;
+ int err_code = 0;
+
+ status = i40iw_process_cqp_cmd(dev, info);
+ if (status) {
+ i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
+ i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
+ return status;
+ }
+ if (cqp_request->waiting)
+ err_code = i40iw_wait_event(iwdev, cqp_request);
+ if (err_code)
+ status = I40IW_ERR_CQP_COMPL_ERROR;
+ return status;
+}
+
+/**
+ * i40iw_add_pdusecount - add pd refcount
+ * @iwpd: pd for refcount
+ */
+void i40iw_add_pdusecount(struct i40iw_pd *iwpd)
+{
+ atomic_inc(&iwpd->usecount);
+}
+
+/**
+ * i40iw_rem_pdusecount - decrement refcount for pd and free if 0
+ * @iwpd: pd for refcount
+ * @iwdev: iwarp device
+ */
+void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
+{
+ if (!atomic_dec_and_test(&iwpd->usecount))
+ return;
+ i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
+ kfree(iwpd);
+}
+
+/**
+ * i40iw_add_ref - add refcount for qp
+ * @ibqp: iqarp qp
+ */
+void i40iw_add_ref(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
+
+ atomic_inc(&iwqp->refcount);
+}
+
+/**
+ * i40iw_rem_ref - rem refcount for qp and free if 0
+ * @ibqp: iqarp qp
+ */
+void i40iw_rem_ref(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp;
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_device *iwdev;
+ u32 qp_num;
+
+ iwqp = to_iwqp(ibqp);
+ if (!atomic_dec_and_test(&iwqp->refcount))
+ return;
+
+ iwdev = iwqp->iwdev;
+ qp_num = iwqp->ibqp.qp_num;
+ iwdev->qp_table[qp_num] = NULL;
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_request->callback_fcn = i40iw_free_qp;
+ cqp_request->param = (void *)&iwqp->sc_qp;
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Destroy QP fail");
+}
+
+/**
+ * i40iw_get_qp - get qp address
+ * @device: iwarp device
+ * @qpn: qp number
+ */
+struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn)
+{
+ struct i40iw_device *iwdev = to_iwdev(device);
+
+ if ((qpn < IW_FIRST_QPN) || (qpn >= iwdev->max_qp))
+ return NULL;
+
+ return &iwdev->qp_table[qpn]->ibqp;
+}
+
+/**
+ * i40iw_debug_buf - print debug msg and buffer is mask set
+ * @dev: hardware control device structure
+ * @mask: mask to compare if to print debug buffer
+ * @buf: points buffer addr
+ * @size: saize of buffer to print
+ */
+void i40iw_debug_buf(struct i40iw_sc_dev *dev,
+ enum i40iw_debug_flag mask,
+ char *desc,
+ u64 *buf,
+ u32 size)
+{
+ u32 i;
+
+ if (!(dev->debug_mask & mask))
+ return;
+ i40iw_debug(dev, mask, "%s\n", desc);
+ i40iw_debug(dev, mask, "starting address virt=%p phy=%llxh\n", buf,
+ (unsigned long long)virt_to_phys(buf));
+
+ for (i = 0; i < size; i += 8)
+ i40iw_debug(dev, mask, "index %03d val: %016llx\n", i, buf[i / 8]);
+}
+
+/**
+ * i40iw_get_hw_addr - return hw addr
+ * @par: points to shared dev
+ */
+u8 __iomem *i40iw_get_hw_addr(void *par)
+{
+ struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)par;
+
+ return dev->hw->hw_addr;
+}
+
+/**
+ * i40iw_remove_head - return head entry and remove from list
+ * @list: list for entry
+ */
+void *i40iw_remove_head(struct list_head *list)
+{
+ struct list_head *entry;
+
+ if (list_empty(list))
+ return NULL;
+
+ entry = (void *)list->next;
+ list_del(entry);
+ return (void *)entry;
+}
+
+/**
+ * i40iw_allocate_dma_mem - Memory alloc helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ */
+enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
+ struct i40iw_dma_mem *mem,
+ u64 size,
+ u32 alignment)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+ if (!mem)
+ return I40IW_ERR_PARAM;
+ mem->size = ALIGN(size, alignment);
+ mem->va = dma_zalloc_coherent(&pcidev->dev, mem->size,
+ (dma_addr_t *)&mem->pa, GFP_KERNEL);
+ if (!mem->va)
+ return I40IW_ERR_NO_MEMORY;
+ return 0;
+}
+
+/**
+ * i40iw_free_dma_mem - Memory free helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to free
+ */
+void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+ if (!mem || !mem->va)
+ return;
+
+ dma_free_coherent(&pcidev->dev, mem->size,
+ mem->va, (dma_addr_t)mem->pa);
+ mem->va = NULL;
+}
+
+/**
+ * i40iw_allocate_virt_mem - virtual memory alloc helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to fill out
+ * @size: size of memory requested
+ */
+enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
+ struct i40iw_virt_mem *mem,
+ u32 size)
+{
+ if (!mem)
+ return I40IW_ERR_PARAM;
+
+ mem->size = size;
+ mem->va = kzalloc(size, GFP_KERNEL);
+
+ if (mem->va)
+ return 0;
+ else
+ return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * i40iw_free_virt_mem - virtual memory free helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to free
+ */
+enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+ struct i40iw_virt_mem *mem)
+{
+ if (!mem)
+ return I40IW_ERR_PARAM;
+ kfree(mem->va);
+ mem->va = NULL;
+ return 0;
+}
+
+/**
+ * i40iw_cqp_sds_cmd - create cqp command for sd
+ * @dev: hardware control device structure
+ * @sd_info: information for sd cqp
+ *
+ */
+enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_update_sds_info *sdinfo)
+{
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+ cqp_info = &cqp_request->info;
+ memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
+ sizeof(cqp_info->in.u.update_pe_sds.info));
+ cqp_info->cqp_cmd = OP_UPDATE_PE_SDS;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.update_pe_sds.dev = dev;
+ cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Update SD's fail");
+ return status;
+}
+
+/**
+ * i40iw_term_modify_qp - modify qp for term message
+ * @qp: hardware control qp
+ * @next_state: qp's next state
+ * @term: terminate code
+ * @term_len: length
+ */
+void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len)
+{
+ struct i40iw_qp *iwqp;
+
+ iwqp = (struct i40iw_qp *)qp->back_qp;
+ i40iw_next_iw_state(iwqp, next_state, 0, term, term_len);
+};
+
+/**
+ * i40iw_terminate_done - after terminate is completed
+ * @qp: hardware control qp
+ * @timeout_occurred: indicates if terminate timer expired
+ */
+void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred)
+{
+ struct i40iw_qp *iwqp;
+ u32 next_iwarp_state = I40IW_QP_STATE_ERROR;
+ u8 hte = 0;
+ bool first_time;
+ unsigned long flags;
+
+ iwqp = (struct i40iw_qp *)qp->back_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->hte_added) {
+ iwqp->hte_added = 0;
+ hte = 1;
+ }
+ first_time = !(qp->term_flags & I40IW_TERM_DONE);
+ qp->term_flags |= I40IW_TERM_DONE;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (first_time) {
+ if (!timeout_occurred)
+ i40iw_terminate_del_timer(qp);
+ else
+ next_iwarp_state = I40IW_QP_STATE_CLOSING;
+
+ i40iw_next_iw_state(iwqp, next_iwarp_state, hte, 0, 0);
+ i40iw_cm_disconn(iwqp);
+ }
+}
+
+/**
+ * i40iw_terminate_imeout - timeout happened
+ * @context: points to iwarp qp
+ */
+static void i40iw_terminate_timeout(unsigned long context)
+{
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)context;
+ struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
+
+ i40iw_terminate_done(qp, 1);
+}
+
+/**
+ * i40iw_terminate_start_timer - start terminate timeout
+ * @qp: hardware control qp
+ */
+void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
+{
+ struct i40iw_qp *iwqp;
+
+ iwqp = (struct i40iw_qp *)qp->back_qp;
+ init_timer(&iwqp->terminate_timer);
+ iwqp->terminate_timer.function = i40iw_terminate_timeout;
+ iwqp->terminate_timer.expires = jiffies + HZ;
+ iwqp->terminate_timer.data = (unsigned long)iwqp;
+ add_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * i40iw_terminate_del_timer - delete terminate timeout
+ * @qp: hardware control qp
+ */
+void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
+{
+ struct i40iw_qp *iwqp;
+
+ iwqp = (struct i40iw_qp *)qp->back_qp;
+ del_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * i40iw_cqp_generic_worker - generic worker for cqp
+ * @work: work pointer
+ */
+static void i40iw_cqp_generic_worker(struct work_struct *work)
+{
+ struct i40iw_virtchnl_work_info *work_info =
+ &((struct virtchnl_work *)work)->work_info;
+
+ if (work_info->worker_vf_dev)
+ work_info->callback_fcn(work_info->worker_vf_dev);
+}
+
+/**
+ * i40iw_cqp_spawn_worker - spawn worket thread
+ * @iwdev: device struct pointer
+ * @work_info: work request info
+ * @iw_vf_idx: virtual function index
+ */
+void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_work_info *work_info,
+ u32 iw_vf_idx)
+{
+ struct virtchnl_work *work;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ work = &iwdev->virtchnl_w[iw_vf_idx];
+ memcpy(&work->work_info, work_info, sizeof(*work_info));
+ INIT_WORK(&work->work, i40iw_cqp_generic_worker);
+ queue_work(iwdev->virtchnl_wq, &work->work);
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_worker -
+ * @work: work pointer for hmc info
+ */
+static void i40iw_cqp_manage_hmc_fcn_worker(struct work_struct *work)
+{
+ struct i40iw_cqp_request *cqp_request =
+ ((struct virtchnl_work *)work)->cqp_request;
+ struct i40iw_ccq_cqe_info ccq_cqe_info;
+ struct i40iw_hmc_fcn_info *hmcfcninfo =
+ &cqp_request->info.in.u.manage_hmc_pm.info;
+ struct i40iw_device *iwdev =
+ (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->back_dev;
+
+ ccq_cqe_info.cqp = NULL;
+ ccq_cqe_info.maj_err_code = cqp_request->compl_info.maj_err_code;
+ ccq_cqe_info.min_err_code = cqp_request->compl_info.min_err_code;
+ ccq_cqe_info.op_code = cqp_request->compl_info.op_code;
+ ccq_cqe_info.op_ret_val = cqp_request->compl_info.op_ret_val;
+ ccq_cqe_info.scratch = 0;
+ ccq_cqe_info.error = cqp_request->compl_info.error;
+ hmcfcninfo->callback_fcn(cqp_request->info.in.u.manage_hmc_pm.dev,
+ hmcfcninfo->cqp_callback_param, &ccq_cqe_info);
+ i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_callback - called function after cqp completion
+ * @cqp_request: cqp request info struct for hmc fun
+ * @unused: unused param of callback
+ */
+static void i40iw_cqp_manage_hmc_fcn_callback(struct i40iw_cqp_request *cqp_request,
+ u32 unused)
+{
+ struct virtchnl_work *work;
+ struct i40iw_hmc_fcn_info *hmcfcninfo =
+ &cqp_request->info.in.u.manage_hmc_pm.info;
+ struct i40iw_device *iwdev =
+ (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->
+ back_dev;
+
+ if (hmcfcninfo && hmcfcninfo->callback_fcn) {
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s1\n", __func__);
+ atomic_inc(&cqp_request->refcount);
+ work = &iwdev->virtchnl_w[hmcfcninfo->iw_vf_idx];
+ work->cqp_request = cqp_request;
+ INIT_WORK(&work->work, i40iw_cqp_manage_hmc_fcn_worker);
+ queue_work(iwdev->virtchnl_wq, &work->work);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s2\n", __func__);
+ } else {
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s: Something wrong\n", __func__);
+ }
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_cmd - issue cqp command to manage hmc
+ * @dev: hardware control device structure
+ * @hmcfcninfo: info for hmc
+ */
+enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_hmc_fcn_info *hmcfcninfo)
+{
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s\n", __func__);
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+ cqp_info = &cqp_request->info;
+ cqp_request->callback_fcn = i40iw_cqp_manage_hmc_fcn_callback;
+ cqp_request->param = hmcfcninfo;
+ memcpy(&cqp_info->in.u.manage_hmc_pm.info, hmcfcninfo,
+ sizeof(*hmcfcninfo));
+ cqp_info->in.u.manage_hmc_pm.dev = dev;
+ cqp_info->cqp_cmd = OP_MANAGE_HMC_PM_FUNC_TABLE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_hmc_pm.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Manage HMC fail");
+ return status;
+}
+
+/**
+ * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm
+ * @iwdev: function device struct
+ * @values_mem: buffer for fpm
+ * @hmc_fn_id: function id for fpm
+ */
+enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_dma_mem *values_mem,
+ u8 hmc_fn_id)
+{
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+ cqp_info = &cqp_request->info;
+ cqp_request->param = NULL;
+ cqp_info->in.u.query_fpm_values.cqp = dev->cqp;
+ cqp_info->in.u.query_fpm_values.fpm_values_pa = values_mem->pa;
+ cqp_info->in.u.query_fpm_values.fpm_values_va = values_mem->va;
+ cqp_info->in.u.query_fpm_values.hmc_fn_id = hmc_fn_id;
+ cqp_info->cqp_cmd = OP_QUERY_FPM_VALUES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.query_fpm_values.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Query FPM fail");
+ return status;
+}
+
+/**
+ * i40iw_cqp_commit_fpm_values_cmd - commit fpm values in hw
+ * @dev: hardware control device structure
+ * @values_mem: buffer with fpm values
+ * @hmc_fn_id: function id for fpm
+ */
+enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_dma_mem *values_mem,
+ u8 hmc_fn_id)
+{
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+ cqp_info = &cqp_request->info;
+ cqp_request->param = NULL;
+ cqp_info->in.u.commit_fpm_values.cqp = dev->cqp;
+ cqp_info->in.u.commit_fpm_values.fpm_values_pa = values_mem->pa;
+ cqp_info->in.u.commit_fpm_values.fpm_values_va = values_mem->va;
+ cqp_info->in.u.commit_fpm_values.hmc_fn_id = hmc_fn_id;
+ cqp_info->cqp_cmd = OP_COMMIT_FPM_VALUES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.commit_fpm_values.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Commit FPM fail");
+ return status;
+}
+
+/**
+ * i40iw_vf_wait_vchnl_resp - wait for channel msg
+ * @iwdev: function's device struct
+ */
+enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
+{
+ struct i40iw_device *iwdev = dev->back_dev;
+ enum i40iw_status_code err_code = 0;
+ int timeout_ret;
+
+ i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
+ __func__, __LINE__, dev, iwdev);
+ atomic_add(2, &iwdev->vchnl_msgs);
+ timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
+ (atomic_read(&iwdev->vchnl_msgs) == 1),
+ I40IW_VCHNL_EVENT_TIMEOUT);
+ atomic_dec(&iwdev->vchnl_msgs);
+ if (!timeout_ret) {
+ i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
+ err_code = I40IW_ERR_TIMEOUT;
+ }
+ return err_code;
+}
+
+/**
+ * i40iw_ieq_mpa_crc_ae - generate AE for crc error
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_qp_flush_info info;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__);
+ memset(&info, 0, sizeof(info));
+ info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR;
+ info.generate_ae = true;
+ info.ae_source = 0x3;
+ (void)i40iw_hw_flush_wqes(iwdev, qp, &info, false);
+}
+
+/**
+ * i40iw_init_hash_desc - initialize hash for crc calculation
+ * @desc: cryption type
+ */
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *tdesc;
+
+ tfm = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(tfm))
+ return I40IW_ERR_MPA_CRC;
+
+ tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
+ GFP_KERNEL);
+ if (!tdesc) {
+ crypto_free_shash(tfm);
+ return I40IW_ERR_MPA_CRC;
+ }
+ tdesc->tfm = tfm;
+ *desc = tdesc;
+
+ return 0;
+}
+
+/**
+ * i40iw_free_hash_desc - free hash desc
+ * @desc: to be freed
+ */
+void i40iw_free_hash_desc(struct shash_desc *desc)
+{
+ if (desc) {
+ crypto_free_shash(desc->tfm);
+ kfree(desc);
+ }
+}
+
+/**
+ * i40iw_alloc_query_fpm_buf - allocate buffer for fpm
+ * @dev: hardware control device structure
+ * @mem: buffer ptr for fpm to be allocated
+ * @return: memory allocation status
+ */
+enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
+ struct i40iw_dma_mem *mem)
+{
+ enum i40iw_status_code status;
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ status = i40iw_obj_aligned_mem(iwdev, mem, I40IW_QUERY_FPM_BUF_SIZE,
+ I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
+ return status;
+}
+
+/**
+ * i40iw_ieq_check_mpacrc - check if mpa crc is OK
+ * @desc: desc for hash
+ * @addr: address of buffer for crc
+ * @length: length of buffer
+ * @value: value to be compared
+ */
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
+ void *addr,
+ u32 length,
+ u32 value)
+{
+ u32 crc = 0;
+ int ret;
+ enum i40iw_status_code ret_code = 0;
+
+ crypto_shash_init(desc);
+ ret = crypto_shash_update(desc, addr, length);
+ if (!ret)
+ crypto_shash_final(desc, (u8 *)&crc);
+ if (crc != value) {
+ i40iw_pr_err("mpa crc check fail\n");
+ ret_code = I40IW_ERR_MPA_CRC;
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_ieq_get_qp - get qp based on quad in puda buffer
+ * @dev: hardware control device structure
+ * @buf: receive puda buffer on exception q
+ */
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
+ struct i40iw_puda_buf *buf)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_qp *iwqp;
+ struct i40iw_cm_node *cm_node;
+ u32 loc_addr[4], rem_addr[4];
+ u16 loc_port, rem_port;
+ struct ipv6hdr *ip6h;
+ struct iphdr *iph = (struct iphdr *)buf->iph;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+ if (iph->version == 4) {
+ memset(loc_addr, 0, sizeof(loc_addr));
+ loc_addr[0] = ntohl(iph->daddr);
+ memset(rem_addr, 0, sizeof(rem_addr));
+ rem_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ i40iw_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
+ i40iw_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
+ }
+ loc_port = ntohs(tcph->dest);
+ rem_port = ntohs(tcph->source);
+
+ cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
+ loc_addr, false);
+ if (!cm_node)
+ return NULL;
+ iwqp = cm_node->iwqp;
+ return &iwqp->sc_qp;
+}
+
+/**
+ * i40iw_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @length: length of buffer
+ * @seqnum: seq number for tcp
+ */
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum)
+{
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ u16 iphlen;
+ u16 packetsize;
+ u8 *addr = (u8 *)buf->mem.va;
+
+ iphlen = (buf->ipv4) ? 20 : 40;
+ iph = (struct iphdr *)(addr + buf->maclen);
+ tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
+ packetsize = length + buf->tcphlen + iphlen;
+
+ iph->tot_len = htons(packetsize);
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * i40iw_puda_get_tcpip_info - get tcpip info from puda buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+ struct i40iw_puda_buf *buf)
+{
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ u16 iphlen;
+ u16 pkt_len;
+ u8 *mem = (u8 *)buf->mem.va;
+ struct ethhdr *ethh = (struct ethhdr *)buf->mem.va;
+
+ if (ethh->h_proto == htons(0x8100)) {
+ info->vlan_valid = true;
+ buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) & VLAN_VID_MASK;
+ }
+ buf->maclen = (info->vlan_valid) ? 18 : 14;
+ iphlen = (info->l3proto) ? 40 : 20;
+ buf->ipv4 = (info->l3proto) ? false : true;
+ buf->iph = mem + buf->maclen;
+ iph = (struct iphdr *)buf->iph;
+
+ buf->tcph = buf->iph + iphlen;
+ tcph = (struct tcphdr *)buf->tcph;
+
+ if (buf->ipv4) {
+ pkt_len = ntohs(iph->tot_len);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ pkt_len = ntohs(ip6h->payload_len) + iphlen;
+ }
+
+ buf->totallen = pkt_len + buf->maclen;
+
+ if (info->payload_len < buf->totallen - 4) {
+ i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
+ info->payload_len, buf->totallen);
+ return I40IW_ERR_INVALID_SIZE;
+ }
+
+ buf->tcphlen = (tcph->doff) << 2;
+ buf->datalen = pkt_len - iphlen - buf->tcphlen;
+ buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+ return 0;
+}
+
+/**
+ * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
+ * @dev: hardware control device structure
+ */
+static void i40iw_hw_stats_timeout(unsigned long dev)
+{
+ struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev;
+ struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat;
+ struct i40iw_dev_pestat *vf_devstat = NULL;
+ u16 iw_vf_idx;
+ unsigned long flags;
+
+ /*PF*/
+ pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats);
+ for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
+ spin_lock_irqsave(&pf_devstat->stats_lock, flags);
+ if (pf_dev->vf_dev[iw_vf_idx]) {
+ if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
+ vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat;
+ vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats);
+ }
+ }
+ spin_unlock_irqrestore(&pf_devstat->stats_lock, flags);
+ }
+
+ mod_timer(&pf_devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * i40iw_hw_stats_start_timer - Start periodic stats timer
+ * @dev: hardware control device structure
+ */
+void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev)
+{
+ struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+
+ init_timer(&devstat->stats_timer);
+ devstat->stats_timer.function = i40iw_hw_stats_timeout;
+ devstat->stats_timer.data = (unsigned long)dev;
+ mod_timer(&devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * i40iw_hw_stats_del_timer - Delete periodic stats timer
+ * @dev: hardware control device structure
+ */
+void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev)
+{
+ struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+
+ del_timer_sync(&devstat->stats_timer);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
new file mode 100644
index 000000000000..1fe3b84a06e4
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -0,0 +1,2437 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <asm/byteorder.h>
+#include <net/ip.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include "i40iw.h"
+
+/**
+ * i40iw_query_device - get device attributes
+ * @ibdev: device pointer from stack
+ * @props: returning device attributes
+ * @udata: user data
+ */
+static int i40iw_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+
+ if (udata->inlen || udata->outlen)
+ return -EINVAL;
+ memset(props, 0, sizeof(*props));
+ ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
+ props->fw_ver = I40IW_FW_VERSION;
+ props->device_cap_flags = iwdev->device_cap_flags;
+ props->vendor_id = iwdev->vendor_id;
+ props->vendor_part_id = iwdev->vendor_part_id;
+ props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
+ props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
+ props->max_qp = iwdev->max_qp;
+ props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
+ props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ props->max_cq = iwdev->max_cq;
+ props->max_cqe = iwdev->max_cqe;
+ props->max_mr = iwdev->max_mr;
+ props->max_pd = iwdev->max_pd;
+ props->max_sge_rd = 1;
+ props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
+ props->max_qp_init_rd_atom = props->max_qp_rd_atom;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->max_map_per_fmr = 1;
+ return 0;
+}
+
+/**
+ * i40iw_query_port - get port attrubutes
+ * @ibdev: device pointer from stack
+ * @port: port number for query
+ * @props: returning device attributes
+ */
+static int i40iw_query_port(struct ib_device *ibdev,
+ u8 port,
+ struct ib_port_attr *props)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct net_device *netdev = iwdev->netdev;
+
+ memset(props, 0, sizeof(*props));
+
+ props->max_mtu = IB_MTU_4096;
+ if (netdev->mtu >= 4096)
+ props->active_mtu = IB_MTU_4096;
+ else if (netdev->mtu >= 2048)
+ props->active_mtu = IB_MTU_2048;
+ else if (netdev->mtu >= 1024)
+ props->active_mtu = IB_MTU_1024;
+ else if (netdev->mtu >= 512)
+ props->active_mtu = IB_MTU_512;
+ else
+ props->active_mtu = IB_MTU_256;
+
+ props->lid = 1;
+ if (netif_carrier_ok(iwdev->netdev))
+ props->state = IB_PORT_ACTIVE;
+ else
+ props->state = IB_PORT_DOWN;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = 1;
+ props->max_msg_sz = 0x80000000;
+ return 0;
+}
+
+/**
+ * i40iw_alloc_ucontext - Allocate the user context data structure
+ * @ibdev: device pointer from stack
+ * @udata: user data
+ *
+ * This keeps track of all objects associated with a particular
+ * user-mode client.
+ */
+static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_alloc_ucontext_req req;
+ struct i40iw_alloc_ucontext_resp uresp;
+ struct i40iw_ucontext *ucontext;
+
+ if (ib_copy_from_udata(&req, udata, sizeof(req)))
+ return ERR_PTR(-EINVAL);
+
+ if (req.userspace_ver != I40IW_ABI_USERSPACE_VER) {
+ i40iw_pr_err("Invalid userspace driver version detected. Detected version %d, should be %d\n",
+ req.userspace_ver, I40IW_ABI_USERSPACE_VER);
+ return ERR_PTR(-EINVAL);
+ }
+
+ memset(&uresp, 0, sizeof(uresp));
+ uresp.max_qps = iwdev->max_qp;
+ uresp.max_pds = iwdev->max_pd;
+ uresp.wq_size = iwdev->max_qp_wr * 2;
+ uresp.kernel_ver = I40IW_ABI_KERNEL_VER;
+
+ ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
+ if (!ucontext)
+ return ERR_PTR(-ENOMEM);
+
+ ucontext->iwdev = iwdev;
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ kfree(ucontext);
+ return ERR_PTR(-EFAULT);
+ }
+
+ INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
+ spin_lock_init(&ucontext->cq_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
+ spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+
+ return &ucontext->ibucontext;
+}
+
+/**
+ * i40iw_dealloc_ucontext - deallocate the user context data structure
+ * @context: user context created during alloc
+ */
+static int i40iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct i40iw_ucontext *ucontext = to_ucontext(context);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ if (!list_empty(&ucontext->cq_reg_mem_list)) {
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ return -EBUSY;
+ }
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ if (!list_empty(&ucontext->qp_reg_mem_list)) {
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ return -EBUSY;
+ }
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ kfree(ucontext);
+ return 0;
+}
+
+/**
+ * i40iw_mmap - user memory map
+ * @context: context created during alloc
+ * @vma: kernel info for user memory map
+ */
+static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct i40iw_ucontext *ucontext;
+ u64 db_addr_offset;
+ u64 push_offset;
+
+ ucontext = to_ucontext(context);
+ if (ucontext->iwdev->sc_dev.is_pf) {
+ db_addr_offset = I40IW_DB_ADDR_OFFSET;
+ push_offset = I40IW_PUSH_OFFSET;
+ if (vma->vm_pgoff)
+ vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
+ } else {
+ db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
+ push_offset = I40IW_VF_PUSH_OFFSET;
+ if (vma->vm_pgoff)
+ vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
+ }
+
+ vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
+
+ if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_private_data = ucontext;
+ } else {
+ if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ }
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT),
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * i40iw_alloc_push_page - allocate a push page for qp
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ */
+static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ enum i40iw_status_code status;
+
+ if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
+ return;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return;
+
+ atomic_inc(&cqp_request->refcount);
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+
+ cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 0;
+ cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (!status)
+ qp->push_idx = cqp_request->compl_info.op_ret_val;
+ else
+ i40iw_pr_err("CQP-OP Push page fail");
+ i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+}
+
+/**
+ * i40iw_dealloc_push_page - free a push page for qp
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ */
+static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ enum i40iw_status_code status;
+
+ if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
+ return;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+
+ cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
+ cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 1;
+ cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (!status)
+ qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+ else
+ i40iw_pr_err("CQP-OP Push page fail");
+}
+
+/**
+ * i40iw_alloc_pd - allocate protection domain
+ * @ibdev: device pointer from stack
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct i40iw_pd *iwpd;
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_alloc_pd_resp uresp;
+ struct i40iw_sc_pd *sc_pd;
+ u32 pd_id = 0;
+ int err;
+
+ err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
+ iwdev->max_pd, &pd_id, &iwdev->next_pd);
+ if (err) {
+ i40iw_pr_err("alloc resource failed\n");
+ return ERR_PTR(err);
+ }
+
+ iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL);
+ if (!iwpd) {
+ err = -ENOMEM;
+ goto free_res;
+ }
+
+ sc_pd = &iwpd->sc_pd;
+ dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id);
+
+ if (context) {
+ memset(&uresp, 0, sizeof(uresp));
+ uresp.pd_id = pd_id;
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ err = -EFAULT;
+ goto error;
+ }
+ }
+
+ i40iw_add_pdusecount(iwpd);
+ return &iwpd->ibpd;
+error:
+ kfree(iwpd);
+free_res:
+ i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
+ return ERR_PTR(err);
+}
+
+/**
+ * i40iw_dealloc_pd - deallocate pd
+ * @ibpd: ptr of pd to be deallocated
+ */
+static int i40iw_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct i40iw_pd *iwpd = to_iwpd(ibpd);
+ struct i40iw_device *iwdev = to_iwdev(ibpd->device);
+
+ i40iw_rem_pdusecount(iwpd, iwdev);
+ return 0;
+}
+
+/**
+ * i40iw_qp_roundup - return round up qp ring size
+ * @wr_ring_size: ring size to round up
+ */
+static int i40iw_qp_roundup(u32 wr_ring_size)
+{
+ int scount = 1;
+
+ if (wr_ring_size < I40IWQP_SW_MIN_WQSIZE)
+ wr_ring_size = I40IWQP_SW_MIN_WQSIZE;
+
+ for (wr_ring_size--; scount <= 16; scount *= 2)
+ wr_ring_size |= wr_ring_size >> scount;
+ return ++wr_ring_size;
+}
+
+/**
+ * i40iw_get_pbl - Retrieve pbl from a list given a virtual
+ * address
+ * @va: user virtual address
+ * @pbl_list: pbl list to search in (QP's or CQ's)
+ */
+static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
+ struct list_head *pbl_list)
+{
+ struct i40iw_pbl *iwpbl;
+
+ list_for_each_entry(iwpbl, pbl_list, list) {
+ if (iwpbl->user_base == va) {
+ list_del(&iwpbl->list);
+ return iwpbl;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * i40iw_free_qp_resources - free up memory resources for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @qp_num: qp number assigned
+ */
+void i40iw_free_qp_resources(struct i40iw_device *iwdev,
+ struct i40iw_qp *iwqp,
+ u32 qp_num)
+{
+ i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
+ if (qp_num)
+ i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
+ i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
+ i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
+ kfree(iwqp->kqp.wrid_mem);
+ iwqp->kqp.wrid_mem = NULL;
+ kfree(iwqp->allocated_buffer);
+ iwqp->allocated_buffer = NULL;
+}
+
+/**
+ * i40iw_clean_cqes - clean cq entries for qp
+ * @iwqp: qp ptr (user or kernel)
+ * @iwcq: cq ptr
+ */
+static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
+{
+ struct i40iw_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
+
+ ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
+}
+
+/**
+ * i40iw_destroy_qp - destroy qp
+ * @ibqp: qp's ib pointer also to get to device's qp address
+ */
+static int i40iw_destroy_qp(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+
+ iwqp->destroyed = 1;
+
+ if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
+ i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 0, 0, 0);
+
+ if (!iwqp->user_mode) {
+ if (iwqp->iwscq) {
+ i40iw_clean_cqes(iwqp, iwqp->iwscq);
+ if (iwqp->iwrcq != iwqp->iwscq)
+ i40iw_clean_cqes(iwqp, iwqp->iwrcq);
+ }
+ }
+
+ i40iw_rem_ref(&iwqp->ibqp);
+ return 0;
+}
+
+/**
+ * i40iw_setup_virt_qp - setup for allocation of virtual qp
+ * @dev: iwarp device
+ * @qp: qp ptr
+ * @init_info: initialize info to return
+ */
+static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
+ struct i40iw_qp *iwqp,
+ struct i40iw_qp_init_info *init_info)
+{
+ struct i40iw_pbl *iwpbl = iwqp->iwpbl;
+ struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
+
+ iwqp->page = qpmr->sq_page;
+ init_info->shadow_area_pa = cpu_to_le64(qpmr->shadow);
+ if (iwpbl->pbl_allocated) {
+ init_info->virtual_map = true;
+ init_info->sq_pa = qpmr->sq_pbl.idx;
+ init_info->rq_pa = qpmr->rq_pbl.idx;
+ } else {
+ init_info->sq_pa = qpmr->sq_pbl.addr;
+ init_info->rq_pa = qpmr->rq_pbl.addr;
+ }
+ return 0;
+}
+
+/**
+ * i40iw_setup_kmode_qp - setup initialization for kernel mode qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ */
+static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
+ struct i40iw_qp *iwqp,
+ struct i40iw_qp_init_info *info)
+{
+ struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
+ u32 sqdepth, rqdepth;
+ u32 sq_size, rq_size;
+ u8 sqshift, rqshift;
+ u32 size;
+ enum i40iw_status_code status;
+ struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+
+ ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
+ sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
+ rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
+
+ status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift);
+ if (!status)
+ status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift);
+
+ if (status)
+ return -ENOSYS;
+
+ sqdepth = sq_size << sqshift;
+ rqdepth = rq_size << rqshift;
+
+ size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
+ iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
+
+ ukinfo->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)iwqp->kqp.wrid_mem;
+ if (!ukinfo->sq_wrtrk_array)
+ return -ENOMEM;
+
+ ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
+
+ size = (sqdepth + rqdepth) * I40IW_QP_WQE_MIN_SIZE;
+ size += (I40IW_SHADOW_AREA_SIZE << 3);
+
+ status = i40iw_allocate_dma_mem(iwdev->sc_dev.hw, mem, size, 256);
+ if (status) {
+ kfree(ukinfo->sq_wrtrk_array);
+ ukinfo->sq_wrtrk_array = NULL;
+ return -ENOMEM;
+ }
+
+ ukinfo->sq = mem->va;
+ info->sq_pa = mem->pa;
+
+ ukinfo->rq = &ukinfo->sq[sqdepth];
+ info->rq_pa = info->sq_pa + (sqdepth * I40IW_QP_WQE_MIN_SIZE);
+
+ ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
+ info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
+
+ ukinfo->sq_size = sq_size;
+ ukinfo->rq_size = rq_size;
+ ukinfo->qp_id = iwqp->ibqp.qp_num;
+ return 0;
+}
+
+/**
+ * i40iw_create_qp - create qp
+ * @ibpd: ptr of pd
+ * @init_attr: attributes for qp
+ * @udata: user data for create qp
+ */
+static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct i40iw_pd *iwpd = to_iwpd(ibpd);
+ struct i40iw_device *iwdev = to_iwdev(ibpd->device);
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_qp *iwqp;
+ struct i40iw_ucontext *ucontext;
+ struct i40iw_create_qp_req req;
+ struct i40iw_create_qp_resp uresp;
+ u32 qp_num = 0;
+ void *mem;
+ enum i40iw_status_code ret;
+ int err_code;
+ int sq_size;
+ int rq_size;
+ struct i40iw_sc_qp *qp;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_qp_init_info init_info;
+ struct i40iw_create_qp_info *qp_info;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ struct i40iw_qp_host_ctx_info *ctx_info;
+ struct i40iwarp_offload_info *iwarp_info;
+ unsigned long flags;
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+ if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
+ init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+
+ memset(&init_info, 0, sizeof(init_info));
+
+ sq_size = init_attr->cap.max_send_wr;
+ rq_size = init_attr->cap.max_recv_wr;
+
+ init_info.qp_uk_init_info.sq_size = sq_size;
+ init_info.qp_uk_init_info.rq_size = rq_size;
+ init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
+ init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+
+ mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ iwqp = (struct i40iw_qp *)mem;
+ qp = &iwqp->sc_qp;
+ qp->back_qp = (void *)iwqp;
+ qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+
+ iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
+
+ if (i40iw_allocate_dma_mem(dev->hw,
+ &iwqp->q2_ctx_mem,
+ I40IW_Q2_BUFFER_SIZE + I40IW_QP_CTX_SIZE,
+ 256)) {
+ i40iw_pr_err("dma_mem failed\n");
+ err_code = -ENOMEM;
+ goto error;
+ }
+
+ init_info.q2 = iwqp->q2_ctx_mem.va;
+ init_info.q2_pa = iwqp->q2_ctx_mem.pa;
+
+ init_info.host_ctx = (void *)init_info.q2 + I40IW_Q2_BUFFER_SIZE;
+ init_info.host_ctx_pa = init_info.q2_pa + I40IW_Q2_BUFFER_SIZE;
+
+ err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_qps, iwdev->max_qp,
+ &qp_num, &iwdev->next_qp);
+ if (err_code) {
+ i40iw_pr_err("qp resource\n");
+ goto error;
+ }
+
+ iwqp->allocated_buffer = mem;
+ iwqp->iwdev = iwdev;
+ iwqp->iwpd = iwpd;
+ iwqp->ibqp.qp_num = qp_num;
+ qp = &iwqp->sc_qp;
+ iwqp->iwscq = to_iwcq(init_attr->send_cq);
+ iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
+
+ iwqp->host_ctx.va = init_info.host_ctx;
+ iwqp->host_ctx.pa = init_info.host_ctx_pa;
+ iwqp->host_ctx.size = I40IW_QP_CTX_SIZE;
+
+ init_info.pd = &iwpd->sc_pd;
+ init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
+ iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
+
+ if (init_attr->qp_type != IB_QPT_RC) {
+ err_code = -ENOSYS;
+ goto error;
+ }
+ if (iwdev->push_mode)
+ i40iw_alloc_push_page(iwdev, qp);
+ if (udata) {
+ err_code = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (err_code) {
+ i40iw_pr_err("ib_copy_from_data\n");
+ goto error;
+ }
+ iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+ if (ibpd->uobject && ibpd->uobject->context) {
+ iwqp->user_mode = 1;
+ ucontext = to_ucontext(ibpd->uobject->context);
+
+ if (req.user_wqe_buffers) {
+ spin_lock_irqsave(
+ &ucontext->qp_reg_mem_list_lock, flags);
+ iwqp->iwpbl = i40iw_get_pbl(
+ (unsigned long)req.user_wqe_buffers,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(
+ &ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwqp->iwpbl) {
+ err_code = -ENODATA;
+ i40iw_pr_err("no pbl info\n");
+ goto error;
+ }
+ }
+ }
+ err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
+ } else {
+ err_code = i40iw_setup_kmode_qp(iwdev, iwqp, &init_info);
+ }
+
+ if (err_code) {
+ i40iw_pr_err("setup qp failed\n");
+ goto error;
+ }
+
+ init_info.type = I40IW_QP_TYPE_IWARP;
+ ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
+ if (ret) {
+ err_code = -EPROTO;
+ i40iw_pr_err("qp_init fail\n");
+ goto error;
+ }
+ ctx_info = &iwqp->ctx_info;
+ iwarp_info = &iwqp->iwarp_info;
+ iwarp_info->rd_enable = true;
+ iwarp_info->wr_rdresp_en = true;
+ if (!iwqp->user_mode)
+ iwarp_info->priv_mode_en = true;
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->rdmap_ver = 1;
+
+ ctx_info->iwarp_info_valid = true;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
+ ctx_info->push_mode_en = false;
+ } else {
+ ctx_info->push_mode_en = true;
+ ctx_info->push_idx = qp->push_idx;
+ }
+
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ (u64 *)iwqp->host_ctx.va,
+ ctx_info);
+ ctx_info->iwarp_info_valid = false;
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto error;
+ }
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+
+ memset(qp_info, 0, sizeof(*qp_info));
+
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = I40IW_QP_STATE_IDLE;
+
+ cqp_info->cqp_cmd = OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ ret = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (ret) {
+ i40iw_pr_err("CQP-OP QP create fail");
+ err_code = -EACCES;
+ goto error;
+ }
+
+ i40iw_add_ref(&iwqp->ibqp);
+ spin_lock_init(&iwqp->lock);
+ iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+ iwdev->qp_table[qp_num] = iwqp;
+ i40iw_add_pdusecount(iwqp->iwpd);
+ if (ibpd->uobject && udata) {
+ memset(&uresp, 0, sizeof(uresp));
+ uresp.actual_sq_size = sq_size;
+ uresp.actual_rq_size = rq_size;
+ uresp.qp_id = qp_num;
+ uresp.push_idx = qp->push_idx;
+ err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (err_code) {
+ i40iw_pr_err("copy_to_udata failed\n");
+ i40iw_destroy_qp(&iwqp->ibqp);
+ /* let the completion of the qp destroy free the qp */
+ return ERR_PTR(err_code);
+ }
+ }
+
+ return &iwqp->ibqp;
+error:
+ i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+ kfree(mem);
+ return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_query - query qp attributes
+ * @ibqp: qp pointer
+ * @attr: attributes pointer
+ * @attr_mask: Not used
+ * @init_attr: qp attributes to return
+ */
+static int i40iw_query_qp(struct ib_qp *ibqp,
+ struct ib_qp_attr *attr,
+ int attr_mask,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+ attr->qp_access_flags = 0;
+ attr->cap.max_send_wr = qp->qp_uk.sq_size;
+ attr->cap.max_recv_wr = qp->qp_uk.rq_size;
+ attr->cap.max_recv_sge = 1;
+ attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+ init_attr->event_handler = iwqp->ibqp.event_handler;
+ init_attr->qp_context = iwqp->ibqp.qp_context;
+ init_attr->send_cq = iwqp->ibqp.send_cq;
+ init_attr->recv_cq = iwqp->ibqp.recv_cq;
+ init_attr->srq = iwqp->ibqp.srq;
+ init_attr->cap = attr->cap;
+ return 0;
+}
+
+/**
+ * i40iw_hw_modify_qp - setup cqp for modify qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: info for modify qp
+ * @wait: flag to wait or not for modify qp completion
+ */
+void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
+ struct i40iw_modify_qp_info *info, bool wait)
+{
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_modify_qp_info *m_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Modify QP fail");
+}
+
+/**
+ * i40iw_modify_qp - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct i40iw_device *iwdev = iwqp->iwdev;
+ struct i40iw_qp_host_ctx_info *ctx_info;
+ struct i40iwarp_offload_info *iwarp_info;
+ struct i40iw_modify_qp_info info;
+ u8 issue_modify_qp = 0;
+ u8 dont_wait = 0;
+ u32 err;
+ unsigned long flags;
+
+ memset(&info, 0, sizeof(info));
+ ctx_info = &iwqp->ctx_info;
+ iwarp_info = &iwqp->iwarp_info;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+
+ if (attr_mask & IB_QP_STATE) {
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_IDLE) {
+ err = -EINVAL;
+ goto exit;
+ }
+ if (iwqp->iwarp_state == I40IW_QP_STATE_INVALID) {
+ info.next_iwarp_state = I40IW_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ break;
+ case IB_QPS_RTS:
+ if ((iwqp->iwarp_state > (u32)I40IW_QP_STATE_RTS) ||
+ (!iwqp->cm_id)) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ issue_modify_qp = 1;
+ iwqp->hw_tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+ iwqp->hte_added = 1;
+ info.next_iwarp_state = I40IW_QP_STATE_RTS;
+ info.tcp_ctx_valid = true;
+ info.ord_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->hw_iwarp_state > (u32)I40IW_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+ if ((iwqp->iwarp_state == (u32)I40IW_QP_STATE_CLOSING) ||
+ (iwqp->iwarp_state < (u32)I40IW_QP_STATE_RTS)) {
+ err = 0;
+ goto exit;
+ }
+ if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_CLOSING) {
+ err = -EINVAL;
+ goto exit;
+ }
+ info.next_iwarp_state = I40IW_QP_STATE_CLOSING;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_SQE:
+ if (iwqp->iwarp_state >= (u32)I40IW_QP_STATE_TERMINATE) {
+ err = -EINVAL;
+ goto exit;
+ }
+ info.next_iwarp_state = I40IW_QP_STATE_TERMINATE;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->iwarp_state == (u32)I40IW_QP_STATE_ERROR) {
+ err = -EINVAL;
+ goto exit;
+ }
+ if (iwqp->sc_qp.term_flags)
+ del_timer(&iwqp->terminate_timer);
+ info.next_iwarp_state = I40IW_QP_STATE_ERROR;
+ if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
+ iwdev->iw_status &&
+ (iwqp->hw_tcp_state != I40IW_TCP_STATE_TIME_WAIT))
+ info.reset_tcp_conn = true;
+ else
+ dont_wait = 1;
+ issue_modify_qp = 1;
+ info.next_iwarp_state = I40IW_QP_STATE_ERROR;
+ break;
+ default:
+ err = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+
+ if (issue_modify_qp)
+ iwqp->iwarp_state = info.next_iwarp_state;
+ else
+ info.next_iwarp_state = iwqp->iwarp_state;
+ }
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ ctx_info->iwarp_info_valid = true;
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ iwarp_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ iwarp_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ iwarp_info->rd_enable = true;
+ if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+ iwarp_info->bind_en = true;
+
+ if (iwqp->user_mode) {
+ iwarp_info->rd_enable = true;
+ iwarp_info->wr_rdresp_en = true;
+ iwarp_info->priv_mode_en = false;
+ }
+ }
+
+ if (ctx_info->iwarp_info_valid) {
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ int ret;
+
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ (u64 *)iwqp->host_ctx.va,
+ ctx_info);
+ if (ret) {
+ i40iw_pr_err("setting QP context\n");
+ err = -EINVAL;
+ goto exit;
+ }
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (issue_modify_qp)
+ i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
+
+ if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
+ if (dont_wait) {
+ if (iwqp->cm_id && iwqp->hw_tcp_state) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
+ iwqp->last_aeq = I40IW_AE_RESET_SENT;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ }
+ }
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ return err;
+}
+
+/**
+ * cq_free_resources - free up recources for cq
+ * @iwdev: iwarp device
+ * @iwcq: cq ptr
+ */
+static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
+{
+ struct i40iw_sc_cq *cq = &iwcq->sc_cq;
+
+ if (!iwcq->user_mode)
+ i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwcq->kmem);
+ i40iw_free_resource(iwdev, iwdev->allocated_cqs, cq->cq_uk.cq_id);
+}
+
+/**
+ * cq_wq_destroy - send cq destroy cqp
+ * @iwdev: iwarp device
+ * @cq: hardware control cq
+ */
+static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
+{
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+
+ cqp_info->cqp_cmd = OP_CQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_destroy.cq = cq;
+ cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Destroy QP fail");
+}
+
+/**
+ * i40iw_destroy_cq - destroy cq
+ * @ib_cq: cq pointer
+ */
+static int i40iw_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct i40iw_cq *iwcq;
+ struct i40iw_device *iwdev;
+ struct i40iw_sc_cq *cq;
+
+ if (!ib_cq) {
+ i40iw_pr_err("ib_cq == NULL\n");
+ return 0;
+ }
+
+ iwcq = to_iwcq(ib_cq);
+ iwdev = to_iwdev(ib_cq->device);
+ cq = &iwcq->sc_cq;
+ cq_wq_destroy(iwdev, cq);
+ cq_free_resources(iwdev, iwcq);
+ kfree(iwcq);
+ return 0;
+}
+
+/**
+ * i40iw_create_cq - create cq
+ * @ibdev: device pointer from stack
+ * @attr: attributes for cq
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_cq *iwcq;
+ struct i40iw_pbl *iwpbl;
+ u32 cq_num = 0;
+ struct i40iw_sc_cq *cq;
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_cq_init_info info;
+ enum i40iw_status_code status;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+ unsigned long flags;
+ int err_code;
+ int entries = attr->cqe;
+
+ if (entries > iwdev->max_cqe)
+ return ERR_PTR(-EINVAL);
+
+ iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
+ if (!iwcq)
+ return ERR_PTR(-ENOMEM);
+
+ memset(&info, 0, sizeof(info));
+
+ err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
+ iwdev->max_cq, &cq_num,
+ &iwdev->next_cq);
+ if (err_code)
+ goto error;
+
+ cq = &iwcq->sc_cq;
+ cq->back_cq = (void *)iwcq;
+ spin_lock_init(&iwcq->lock);
+
+ info.dev = dev;
+ ukinfo->cq_size = max(entries, 4);
+ ukinfo->cq_id = cq_num;
+ iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+ info.ceqe_mask = 0;
+ info.ceq_id = 0;
+ info.ceq_id_valid = true;
+ info.ceqe_mask = 1;
+ info.type = I40IW_CQ_TYPE_IWARP;
+ if (context) {
+ struct i40iw_ucontext *ucontext;
+ struct i40iw_create_cq_req req;
+ struct i40iw_cq_mr *cqmr;
+
+ memset(&req, 0, sizeof(req));
+ iwcq->user_mode = true;
+ ucontext = to_ucontext(context);
+ if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req)))
+ goto cq_free_resources;
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ if (!iwpbl) {
+ err_code = -EPROTO;
+ goto cq_free_resources;
+ }
+
+ iwcq->iwpbl = iwpbl;
+ iwcq->cq_mem_size = 0;
+ cqmr = &iwpbl->cq_mr;
+ info.shadow_area_pa = cpu_to_le64(cqmr->shadow);
+ if (iwpbl->pbl_allocated) {
+ info.virtual_map = true;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+ } else {
+ info.cq_base_pa = cqmr->cq_pbl.addr;
+ }
+ } else {
+ /* Kmode allocations */
+ int rsize;
+ int shadow;
+
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct i40iw_cqe);
+ rsize = round_up(rsize, 256);
+ shadow = I40IW_SHADOW_AREA_SIZE << 3;
+ status = i40iw_allocate_dma_mem(dev->hw, &iwcq->kmem,
+ rsize + shadow, 256);
+ if (status) {
+ err_code = -ENOMEM;
+ goto cq_free_resources;
+ }
+ ukinfo->cq_base = iwcq->kmem.va;
+ info.cq_base_pa = iwcq->kmem.pa;
+ info.shadow_area_pa = info.cq_base_pa + rsize;
+ ukinfo->shadow_area = iwcq->kmem.va + rsize;
+ }
+
+ if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
+ i40iw_pr_err("init cq fail\n");
+ err_code = -EPROTO;
+ goto cq_free_resources;
+ }
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto cq_free_resources;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status) {
+ i40iw_pr_err("CQP-OP Create QP fail");
+ err_code = -EPROTO;
+ goto cq_free_resources;
+ }
+
+ if (context) {
+ struct i40iw_create_cq_resp resp;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.cq_id = info.cq_uk_init_info.cq_id;
+ resp.cq_size = info.cq_uk_init_info.cq_size;
+ if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+ i40iw_pr_err("copy to user data\n");
+ err_code = -EPROTO;
+ goto cq_destroy;
+ }
+ }
+
+ return (struct ib_cq *)iwcq;
+
+cq_destroy:
+ cq_wq_destroy(iwdev, cq);
+cq_free_resources:
+ cq_free_resources(iwdev, iwcq);
+error:
+ kfree(iwcq);
+ return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_get_user_access - get hw access from IB access
+ * @acc: IB access to return hw access
+ */
+static inline u16 i40iw_get_user_access(int acc)
+{
+ u16 access = 0;
+
+ access |= (acc & IB_ACCESS_LOCAL_WRITE) ? I40IW_ACCESS_FLAGS_LOCALWRITE : 0;
+ access |= (acc & IB_ACCESS_REMOTE_WRITE) ? I40IW_ACCESS_FLAGS_REMOTEWRITE : 0;
+ access |= (acc & IB_ACCESS_REMOTE_READ) ? I40IW_ACCESS_FLAGS_REMOTEREAD : 0;
+ access |= (acc & IB_ACCESS_MW_BIND) ? I40IW_ACCESS_FLAGS_BIND_WINDOW : 0;
+ return access;
+}
+
+/**
+ * i40iw_free_stag - free stag resource
+ * @iwdev: iwarp device
+ * @stag: stag to free
+ */
+static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
+{
+ u32 stag_idx;
+
+ stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+ i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
+}
+
+/**
+ * i40iw_create_stag - create random stag
+ * @iwdev: iwarp device
+ */
+static u32 i40iw_create_stag(struct i40iw_device *iwdev)
+{
+ u32 stag = 0;
+ u32 stag_index = 0;
+ u32 next_stag_index;
+ u32 driver_key;
+ u32 random;
+ u8 consumer_key;
+ int ret;
+
+ get_random_bytes(&random, sizeof(random));
+ consumer_key = (u8)random;
+
+ driver_key = random & ~iwdev->mr_stagmask;
+ next_stag_index = (random & iwdev->mr_stagmask) >> 8;
+ next_stag_index %= iwdev->max_mr;
+
+ ret = i40iw_alloc_resource(iwdev,
+ iwdev->allocated_mrs, iwdev->max_mr,
+ &stag_index, &next_stag_index);
+ if (!ret) {
+ stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
+ stag |= driver_key;
+ stag += (u32)consumer_key;
+ }
+ return stag;
+}
+
+/**
+ * i40iw_next_pbl_addr - Get next pbl address
+ * @palloc: Poiner to allocated pbles
+ * @pbl: pointer to a pble
+ * @pinfo: info pointer
+ * @idx: index
+ */
+static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc,
+ u64 *pbl,
+ struct i40iw_pble_info **pinfo,
+ u32 *idx)
+{
+ *idx += 1;
+ if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
+ return ++pbl;
+ *idx = 0;
+ (*pinfo)++;
+ return (u64 *)(*pinfo)->addr;
+}
+
+/**
+ * i40iw_copy_user_pgaddrs - copy user page address to pble's os locally
+ * @iwmr: iwmr for IB's user page addresses
+ * @pbl: ple pointer to save 1 level or 0 level pble
+ * @level: indicated level 0, 1 or 2
+ */
+static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
+ u64 *pbl,
+ enum i40iw_pble_level level)
+{
+ struct ib_umem *region = iwmr->region;
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ int chunk_pages, entry, pg_shift, i;
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct i40iw_pble_info *pinfo;
+ struct scatterlist *sg;
+ u32 idx = 0;
+
+ pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
+ pg_shift = ffs(region->page_size) - 1;
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ chunk_pages = sg_dma_len(sg) >> pg_shift;
+ if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
+ !iwpbl->qp_mr.sq_page)
+ iwpbl->qp_mr.sq_page = sg_page(sg);
+ for (i = 0; i < chunk_pages; i++) {
+ *pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i);
+ pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx);
+ }
+ }
+}
+
+/**
+ * i40iw_setup_pbles - copy user pg address to pble's
+ * @iwdev: iwarp device
+ * @iwmr: mr pointer for this memory registration
+ * @use_pbles: flag if to use pble's or memory (level 0)
+ */
+static int i40iw_setup_pbles(struct i40iw_device *iwdev,
+ struct i40iw_mr *iwmr,
+ bool use_pbles)
+{
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct i40iw_pble_info *pinfo;
+ u64 *pbl;
+ enum i40iw_status_code status;
+ enum i40iw_pble_level level = I40IW_LEVEL_1;
+
+ if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS))
+ return -ENOMEM;
+
+ if (use_pbles) {
+ mutex_lock(&iwdev->pbl_mutex);
+ status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
+ mutex_unlock(&iwdev->pbl_mutex);
+ if (status)
+ return -ENOMEM;
+
+ iwpbl->pbl_allocated = true;
+ level = palloc->level;
+ pinfo = (level == I40IW_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf;
+ pbl = (u64 *)pinfo->addr;
+ } else {
+ pbl = iwmr->pgaddrmem;
+ }
+
+ i40iw_copy_user_pgaddrs(iwmr, pbl, level);
+ return 0;
+}
+
+/**
+ * i40iw_handle_q_mem - handle memory for qp and cq
+ * @iwdev: iwarp device
+ * @req: information for q memory management
+ * @iwpbl: pble struct
+ * @use_pbles: flag to use pble
+ */
+static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
+ struct i40iw_mem_reg_req *req,
+ struct i40iw_pbl *iwpbl,
+ bool use_pbles)
+{
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct i40iw_mr *iwmr = iwpbl->iwmr;
+ struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
+ struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
+ struct i40iw_hmc_pble *hmc_p;
+ u64 *arr = iwmr->pgaddrmem;
+ int err;
+ int total;
+
+ total = req->sq_pages + req->rq_pages + req->cq_pages;
+
+ err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
+ if (err)
+ return err;
+ if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ return -ENOMEM;
+ }
+
+ if (use_pbles)
+ arr = (u64 *)palloc->level1.addr;
+ if (req->reg_type == IW_MEMREG_TYPE_QP) {
+ hmc_p = &qpmr->sq_pbl;
+ qpmr->shadow = (dma_addr_t)arr[total];
+ if (use_pbles) {
+ hmc_p->idx = palloc->level1.idx;
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->idx = palloc->level1.idx + req->sq_pages;
+ } else {
+ hmc_p->addr = arr[0];
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->addr = arr[1];
+ }
+ } else { /* CQ */
+ hmc_p = &cqmr->cq_pbl;
+ cqmr->shadow = (dma_addr_t)arr[total];
+ if (use_pbles)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ }
+ return err;
+}
+
+/**
+ * i40iw_hwreg_mr - send cqp command for memory registration
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ * @access: access for MR
+ */
+static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
+ struct i40iw_mr *iwmr,
+ u16 access)
+{
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ struct i40iw_reg_ns_stag_info *stag_info;
+ struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ enum i40iw_status_code status;
+ int err = 0;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
+ memset(stag_info, 0, sizeof(*stag_info));
+ stag_info->va = (void *)(unsigned long)iwpbl->user_base;
+ stag_info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+ stag_info->stag_key = (u8)iwmr->stag;
+ stag_info->total_len = iwmr->length;
+ stag_info->access_rights = access;
+ stag_info->pd_id = iwpd->sc_pd.pd_id;
+ stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
+
+ if (iwmr->page_cnt > 1) {
+ if (palloc->level == I40IW_LEVEL_1) {
+ stag_info->first_pm_pbl_index = palloc->level1.idx;
+ stag_info->chunk_size = 1;
+ } else {
+ stag_info->first_pm_pbl_index = palloc->level2.root.idx;
+ stag_info->chunk_size = 3;
+ }
+ } else {
+ stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
+ }
+
+ cqp_info->cqp_cmd = OP_MR_REG_NON_SHARED;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->sc_dev;
+ cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
+
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status) {
+ err = -ENOMEM;
+ i40iw_pr_err("CQP-OP MR Reg fail");
+ }
+ return err;
+}
+
+/**
+ * i40iw_reg_user_mr - Register a user memory region
+ * @pd: ptr of pd
+ * @start: virtual start address
+ * @length: length of mr
+ * @virt: virtual address
+ * @acc: access of mr
+ * @udata: user data
+ */
+static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
+ u64 start,
+ u64 length,
+ u64 virt,
+ int acc,
+ struct ib_udata *udata)
+{
+ struct i40iw_pd *iwpd = to_iwpd(pd);
+ struct i40iw_device *iwdev = to_iwdev(pd->device);
+ struct i40iw_ucontext *ucontext;
+ struct i40iw_pble_alloc *palloc;
+ struct i40iw_pbl *iwpbl;
+ struct i40iw_mr *iwmr;
+ struct ib_umem *region;
+ struct i40iw_mem_reg_req req;
+ u32 pbl_depth = 0;
+ u32 stag = 0;
+ u16 access;
+ u32 region_length;
+ bool use_pbles = false;
+ unsigned long flags;
+ int err = -ENOSYS;
+
+ region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ if (IS_ERR(region))
+ return (struct ib_mr *)region;
+
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ ib_umem_release(region);
+ return ERR_PTR(-EFAULT);
+ }
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr) {
+ ib_umem_release(region);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->region = region;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ ucontext = to_ucontext(pd->uobject->context);
+ region_length = region->length + (start & 0xfff);
+ pbl_depth = region_length >> 12;
+ pbl_depth += (region_length & (4096 - 1)) ? 1 : 0;
+ iwmr->length = region->length;
+
+ iwpbl->user_base = virt;
+ palloc = &iwpbl->pble_alloc;
+
+ iwmr->type = req.reg_type;
+ iwmr->page_cnt = pbl_depth;
+
+ switch (req.reg_type) {
+ case IW_MEMREG_TYPE_QP:
+ use_pbles = ((req.sq_pages + req.rq_pages) > 2);
+ err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ if (err)
+ goto error;
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ break;
+ case IW_MEMREG_TYPE_CQ:
+ use_pbles = (req.cq_pages > 1);
+ err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ if (err)
+ goto error;
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ break;
+ case IW_MEMREG_TYPE_MEM:
+ access = I40IW_ACCESS_FLAGS_LOCALREAD;
+
+ use_pbles = (iwmr->page_cnt != 1);
+ err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
+ if (err)
+ goto error;
+
+ access |= i40iw_get_user_access(acc);
+ stag = i40iw_create_stag(iwdev);
+ if (!stag) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+
+ err = i40iw_hwreg_mr(iwdev, iwmr, access);
+ if (err) {
+ i40iw_free_stag(iwdev, stag);
+ goto error;
+ }
+ break;
+ default:
+ goto error;
+ }
+
+ iwmr->type = req.reg_type;
+ if (req.reg_type == IW_MEMREG_TYPE_MEM)
+ i40iw_add_pdusecount(iwpd);
+ return &iwmr->ibmr;
+
+error:
+ if (palloc->level != I40IW_LEVEL_0)
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ ib_umem_release(region);
+ kfree(iwmr);
+ return ERR_PTR(err);
+}
+
+/**
+ * i40iw_reg_phys_mr - register kernel physical memory
+ * @pd: ibpd pointer
+ * @addr: physical address of memory to register
+ * @size: size of memory to register
+ * @acc: Access rights
+ * @iova_start: start of virtual address for physical buffers
+ */
+struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
+ u64 addr,
+ u64 size,
+ int acc,
+ u64 *iova_start)
+{
+ struct i40iw_pd *iwpd = to_iwpd(pd);
+ struct i40iw_device *iwdev = to_iwdev(pd->device);
+ struct i40iw_pbl *iwpbl;
+ struct i40iw_mr *iwmr;
+ enum i40iw_status_code status;
+ u32 stag;
+ u16 access = I40IW_ACCESS_FLAGS_LOCALREAD;
+ int ret;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IW_MEMREG_TYPE_MEM;
+ iwpbl->user_base = *iova_start;
+ stag = i40iw_create_stag(iwdev);
+ if (!stag) {
+ ret = -EOVERFLOW;
+ goto err;
+ }
+ access |= i40iw_get_user_access(acc);
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->page_cnt = 1;
+ iwmr->pgaddrmem[0] = addr;
+ status = i40iw_hwreg_mr(iwdev, iwmr, access);
+ if (status) {
+ i40iw_free_stag(iwdev, stag);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ i40iw_add_pdusecount(iwpd);
+ return &iwmr->ibmr;
+ err:
+ kfree(iwmr);
+ return ERR_PTR(ret);
+}
+
+/**
+ * i40iw_get_dma_mr - register physical mem
+ * @pd: ptr of pd
+ * @acc: access for memory
+ */
+static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ u64 kva = 0;
+
+ return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
+}
+
+/**
+ * i40iw_del_mem_list - Deleting pbl list entries for CQ/QP
+ * @iwmr: iwmr for IB's user page addresses
+ * @ucontext: ptr to user context
+ */
+static void i40iw_del_memlist(struct i40iw_mr *iwmr,
+ struct i40iw_ucontext *ucontext)
+{
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ unsigned long flags;
+
+ switch (iwmr->type) {
+ case IW_MEMREG_TYPE_CQ:
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ if (!list_empty(&ucontext->cq_reg_mem_list))
+ list_del(&iwpbl->list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ break;
+ case IW_MEMREG_TYPE_QP:
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ if (!list_empty(&ucontext->qp_reg_mem_list))
+ list_del(&iwpbl->list);
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * i40iw_dereg_mr - deregister mr
+ * @ib_mr: mr ptr for dereg
+ */
+static int i40iw_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct ib_pd *ibpd = ib_mr->pd;
+ struct i40iw_pd *iwpd = to_iwpd(ibpd);
+ struct i40iw_mr *iwmr = to_iwmr(ib_mr);
+ struct i40iw_device *iwdev = to_iwdev(ib_mr->device);
+ enum i40iw_status_code status;
+ struct i40iw_dealloc_stag_info *info;
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ u32 stag_idx;
+
+ if (iwmr->region)
+ ib_umem_release(iwmr->region);
+
+ if (iwmr->type != IW_MEMREG_TYPE_MEM) {
+ if (ibpd->uobject) {
+ struct i40iw_ucontext *ucontext;
+
+ ucontext = to_ucontext(ibpd->uobject->context);
+ i40iw_del_memlist(iwmr, ucontext);
+ }
+ if (iwpbl->pbl_allocated)
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ kfree(iwpbl->iwmr);
+ iwpbl->iwmr = NULL;
+ return 0;
+ }
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+
+ info->pd_id = cpu_to_le32(iwpd->sc_pd.pd_id & 0x00007fff);
+ info->stag_idx = RS_64_1(ib_mr->rkey, I40IW_CQPSQ_STAG_IDX_SHIFT);
+ stag_idx = info->stag_idx;
+ info->mr = true;
+ if (iwpbl->pbl_allocated)
+ info->dealloc_pbl = true;
+
+ cqp_info->cqp_cmd = OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP dealloc failed for stag_idx = 0x%x\n", stag_idx);
+ i40iw_rem_pdusecount(iwpd, iwdev);
+ i40iw_free_stag(iwdev, iwmr->stag);
+ if (iwpbl->pbl_allocated)
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ kfree(iwmr);
+ return 0;
+}
+
+/**
+ * i40iw_show_rev
+ */
+static ssize_t i40iw_show_rev(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i40iw_ib_device *iwibdev = container_of(dev,
+ struct i40iw_ib_device,
+ ibdev.dev);
+ u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
+
+ return sprintf(buf, "%x\n", hw_rev);
+}
+
+/**
+ * i40iw_show_fw_ver
+ */
+static ssize_t i40iw_show_fw_ver(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u32 firmware_version = I40IW_FW_VERSION;
+
+ return sprintf(buf, "%u.%u\n", firmware_version,
+ (firmware_version & 0x000000ff));
+}
+
+/**
+ * i40iw_show_hca
+ */
+static ssize_t i40iw_show_hca(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "I40IW\n");
+}
+
+/**
+ * i40iw_show_board
+ */
+static ssize_t i40iw_show_board(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
+
+static struct device_attribute *i40iw_dev_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+/**
+ * i40iw_copy_sg_list - copy sg list for qp
+ * @sg_list: copied into sg_list
+ * @sgl: copy from sgl
+ * @num_sges: count of sg entries
+ */
+static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, int num_sges)
+{
+ unsigned int i;
+
+ for (i = 0; (i < num_sges) && (i < I40IW_MAX_WQ_FRAGMENT_COUNT); i++) {
+ sg_list[i].tag_off = sgl[i].addr;
+ sg_list[i].len = sgl[i].length;
+ sg_list[i].stag = sgl[i].lkey;
+ }
+}
+
+/**
+ * i40iw_post_send - kernel application wr
+ * @ibqp: qp ptr for wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+static int i40iw_post_send(struct ib_qp *ibqp,
+ struct ib_send_wr *ib_wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct i40iw_qp *iwqp;
+ struct i40iw_qp_uk *ukqp;
+ struct i40iw_post_sq_info info;
+ enum i40iw_status_code ret;
+ int err = 0;
+ unsigned long flags;
+
+ iwqp = (struct i40iw_qp *)ibqp;
+ ukqp = &iwqp->sc_qp.qp_uk;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ while (ib_wr) {
+ memset(&info, 0, sizeof(info));
+ info.wr_id = (u64)(ib_wr->wr_id);
+ if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
+ info.signaled = true;
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ info.read_fence = true;
+
+ switch (ib_wr->opcode) {
+ case IB_WR_SEND:
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = I40IW_OP_TYPE_SEND_SOL;
+ else
+ info.op_type = I40IW_OP_TYPE_SEND;
+
+ if (ib_wr->send_flags & IB_SEND_INLINE) {
+ info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
+ info.op.inline_send.len = ib_wr->sg_list[0].length;
+ ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+ } else {
+ info.op.send.num_sges = ib_wr->num_sge;
+ info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
+ ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+ }
+
+ if (ret)
+ err = -EIO;
+ break;
+ case IB_WR_RDMA_WRITE:
+ info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
+
+ if (ib_wr->send_flags & IB_SEND_INLINE) {
+ info.op.inline_rdma_write.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
+ info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
+ info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+ info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+ info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
+ ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
+ } else {
+ info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+ info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+ info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
+ ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
+ }
+
+ if (ret)
+ err = -EIO;
+ break;
+ case IB_WR_RDMA_READ:
+ info.op_type = I40IW_OP_TYPE_RDMA_READ;
+ info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+ info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
+ info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
+ info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
+ info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
+ ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false);
+ if (ret)
+ err = -EIO;
+ break;
+ default:
+ err = -EINVAL;
+ i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
+ ib_wr->opcode);
+ break;
+ }
+
+ if (err)
+ break;
+ ib_wr = ib_wr->next;
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+ else
+ ukqp->ops.iw_qp_post_wr(ukqp);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return err;
+}
+
+/**
+ * i40iw_post_recv - post receive wr for kernel application
+ * @ibqp: ib qp pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int i40iw_post_recv(struct ib_qp *ibqp,
+ struct ib_recv_wr *ib_wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct i40iw_qp *iwqp;
+ struct i40iw_qp_uk *ukqp;
+ struct i40iw_post_rq_info post_recv;
+ struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
+ enum i40iw_status_code ret = 0;
+ unsigned long flags;
+
+ iwqp = (struct i40iw_qp *)ibqp;
+ ukqp = &iwqp->sc_qp.qp_uk;
+
+ memset(&post_recv, 0, sizeof(post_recv));
+ spin_lock_irqsave(&iwqp->lock, flags);
+ while (ib_wr) {
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ i40iw_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
+ post_recv.sg_list = sg_list;
+ ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
+ if (ret) {
+ i40iw_pr_err(" post_recv err %d\n", ret);
+ *bad_wr = ib_wr;
+ goto out;
+ }
+ ib_wr = ib_wr->next;
+ }
+ out:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (ret)
+ return -ENOSYS;
+ return 0;
+}
+
+/**
+ * i40iw_poll_cq - poll cq for completion (kernel apps)
+ * @ibcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of entry completed
+ */
+static int i40iw_poll_cq(struct ib_cq *ibcq,
+ int num_entries,
+ struct ib_wc *entry)
+{
+ struct i40iw_cq *iwcq;
+ int cqe_count = 0;
+ struct i40iw_cq_poll_info cq_poll_info;
+ enum i40iw_status_code ret;
+ struct i40iw_cq_uk *ukcq;
+ struct i40iw_sc_qp *qp;
+ unsigned long flags;
+
+ iwcq = (struct i40iw_cq *)ibcq;
+ ukcq = &iwcq->sc_cq.cq_uk;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ while (cqe_count < num_entries) {
+ ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
+ if (ret == I40IW_ERR_QUEUE_EMPTY) {
+ break;
+ } else if (ret) {
+ if (!cqe_count)
+ cqe_count = -1;
+ break;
+ }
+ entry->wc_flags = 0;
+ entry->wr_id = cq_poll_info.wr_id;
+ if (!cq_poll_info.error)
+ entry->status = IB_WC_SUCCESS;
+ else
+ entry->status = IB_WC_WR_FLUSH_ERR;
+
+ switch (cq_poll_info.op_type) {
+ case I40IW_OP_TYPE_RDMA_WRITE:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case I40IW_OP_TYPE_RDMA_READ_INV_STAG:
+ case I40IW_OP_TYPE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ break;
+ case I40IW_OP_TYPE_SEND_SOL:
+ case I40IW_OP_TYPE_SEND_SOL_INV:
+ case I40IW_OP_TYPE_SEND_INV:
+ case I40IW_OP_TYPE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case I40IW_OP_TYPE_REC:
+ entry->opcode = IB_WC_RECV;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ break;
+ }
+
+ entry->vendor_err =
+ cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
+ entry->ex.imm_data = 0;
+ qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
+ entry->qp = (struct ib_qp *)qp->back_qp;
+ entry->src_qp = cq_poll_info.qp_id;
+ entry->byte_len = cq_poll_info.bytes_xfered;
+ entry++;
+ cqe_count++;
+ }
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+ return cqe_count;
+}
+
+/**
+ * i40iw_req_notify_cq - arm cq kernel application
+ * @ibcq: cq to arm
+ * @notify_flags: notofication flags
+ */
+static int i40iw_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags notify_flags)
+{
+ struct i40iw_cq *iwcq;
+ struct i40iw_cq_uk *ukcq;
+ enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+
+ iwcq = (struct i40iw_cq *)ibcq;
+ ukcq = &iwcq->sc_cq.cq_uk;
+ if (notify_flags == IB_CQ_NEXT_COMP)
+ cq_notify = IW_CQ_COMPL_EVENT;
+ ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+ return 0;
+}
+
+/**
+ * i40iw_port_immutable - return port's immutable data
+ * @ibdev: ib dev struct
+ * @port_num: port number
+ * @immutable: immutable data for the port return
+ */
+static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ err = i40iw_query_port(ibdev, port_num, &attr);
+
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ return 0;
+}
+
+/**
+ * i40iw_get_protocol_stats - Populates the rdma_stats structure
+ * @ibdev: ib dev struct
+ * @stats: iw protocol stats struct
+ */
+static int i40iw_get_protocol_stats(struct ib_device *ibdev,
+ union rdma_protocol_stats *stats)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+ struct timespec curr_time;
+ static struct timespec last_rd_time = {0, 0};
+ enum i40iw_status_code status = 0;
+ unsigned long flags;
+
+ curr_time = current_kernel_time();
+ memset(stats, 0, sizeof(*stats));
+
+ if (dev->is_pf) {
+ spin_lock_irqsave(&devstat->stats_lock, flags);
+ devstat->ops.iw_hw_stat_read_all(devstat,
+ &devstat->hw_stats);
+ spin_unlock_irqrestore(&devstat->stats_lock, flags);
+ } else {
+ if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
+ status = i40iw_vchnl_vf_get_pe_stats(dev,
+ &devstat->hw_stats);
+
+ if (status)
+ return -ENOSYS;
+ }
+
+ stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
+ hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS];
+ stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] +
+ hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC];
+ stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] +
+ hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD];
+ stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] +
+ hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE];
+ stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] +
+ hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS];
+ stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] +
+ hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS];
+ stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] +
+ hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS];
+ stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] +
+ hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS];
+ stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG];
+ stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS];
+ stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG];
+
+ last_rd_time = curr_time;
+ return 0;
+}
+
+/**
+ * i40iw_query_gid - Query port GID
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int i40iw_query_gid(struct ib_device *ibdev,
+ u8 port,
+ int index,
+ union ib_gid *gid)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+
+ memset(gid->raw, 0, sizeof(gid->raw));
+ ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
+ return 0;
+}
+
+/**
+ * i40iw_modify_port Modify port properties
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @port_modify_mask: mask for port modifications
+ * @props: port properties
+ */
+static int i40iw_modify_port(struct ib_device *ibdev,
+ u8 port,
+ int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ return 0;
+}
+
+/**
+ * i40iw_query_pkey - Query partition key
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: index of pkey
+ * @pkey: pointer to store the pkey
+ */
+static int i40iw_query_pkey(struct ib_device *ibdev,
+ u8 port,
+ u16 index,
+ u16 *pkey)
+{
+ *pkey = 0;
+ return 0;
+}
+
+/**
+ * i40iw_create_ah - create address handle
+ * @ibpd: ptr of pd
+ * @ah_attr: address handle attributes
+ */
+static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
+ struct ib_ah_attr *attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+/**
+ * i40iw_destroy_ah - Destroy address handle
+ * @ah: pointer to address handle
+ */
+static int i40iw_destroy_ah(struct ib_ah *ah)
+{
+ return -ENOSYS;
+}
+
+/**
+ * i40iw_init_rdma_device - initialization of iwarp device
+ * @iwdev: iwarp device
+ */
+static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev)
+{
+ struct i40iw_ib_device *iwibdev;
+ struct net_device *netdev = iwdev->netdev;
+ struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
+
+ iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev));
+ if (!iwibdev) {
+ i40iw_pr_err("iwdev == NULL\n");
+ return NULL;
+ }
+ strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
+ iwibdev->ibdev.owner = THIS_MODULE;
+ iwdev->iwibdev = iwibdev;
+ iwibdev->iwdev = iwdev;
+
+ iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
+ ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
+
+ iwibdev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND);
+ iwibdev->ibdev.phys_port_cnt = 1;
+ iwibdev->ibdev.num_comp_vectors = 1;
+ iwibdev->ibdev.dma_device = &pcidev->dev;
+ iwibdev->ibdev.dev.parent = &pcidev->dev;
+ iwibdev->ibdev.query_port = i40iw_query_port;
+ iwibdev->ibdev.modify_port = i40iw_modify_port;
+ iwibdev->ibdev.query_pkey = i40iw_query_pkey;
+ iwibdev->ibdev.query_gid = i40iw_query_gid;
+ iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
+ iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext;
+ iwibdev->ibdev.mmap = i40iw_mmap;
+ iwibdev->ibdev.alloc_pd = i40iw_alloc_pd;
+ iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd;
+ iwibdev->ibdev.create_qp = i40iw_create_qp;
+ iwibdev->ibdev.modify_qp = i40iw_modify_qp;
+ iwibdev->ibdev.query_qp = i40iw_query_qp;
+ iwibdev->ibdev.destroy_qp = i40iw_destroy_qp;
+ iwibdev->ibdev.create_cq = i40iw_create_cq;
+ iwibdev->ibdev.destroy_cq = i40iw_destroy_cq;
+ iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
+ iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
+ iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
+ iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats;
+ iwibdev->ibdev.query_device = i40iw_query_device;
+ iwibdev->ibdev.create_ah = i40iw_create_ah;
+ iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
+ iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
+ if (!iwibdev->ibdev.iwcm) {
+ ib_dealloc_device(&iwibdev->ibdev);
+ i40iw_pr_err("iwcm == NULL\n");
+ return NULL;
+ }
+
+ iwibdev->ibdev.iwcm->add_ref = i40iw_add_ref;
+ iwibdev->ibdev.iwcm->rem_ref = i40iw_rem_ref;
+ iwibdev->ibdev.iwcm->get_qp = i40iw_get_qp;
+ iwibdev->ibdev.iwcm->connect = i40iw_connect;
+ iwibdev->ibdev.iwcm->accept = i40iw_accept;
+ iwibdev->ibdev.iwcm->reject = i40iw_reject;
+ iwibdev->ibdev.iwcm->create_listen = i40iw_create_listen;
+ iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen;
+ memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
+ sizeof(iwibdev->ibdev.iwcm->ifname));
+ iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
+ iwibdev->ibdev.poll_cq = i40iw_poll_cq;
+ iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
+ iwibdev->ibdev.post_send = i40iw_post_send;
+ iwibdev->ibdev.post_recv = i40iw_post_recv;
+
+ return iwibdev;
+}
+
+/**
+ * i40iw_port_ibevent - indicate port event
+ * @iwdev: iwarp device
+ */
+void i40iw_port_ibevent(struct i40iw_device *iwdev)
+{
+ struct i40iw_ib_device *iwibdev = iwdev->iwibdev;
+ struct ib_event event;
+
+ event.device = &iwibdev->ibdev;
+ event.element.port_num = 1;
+ event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&event);
+}
+
+/**
+ * i40iw_unregister_rdma_device - unregister of iwarp from IB
+ * @iwibdev: rdma device ptr
+ */
+static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
+ device_remove_file(&iwibdev->ibdev.dev,
+ i40iw_dev_attributes[i]);
+ ib_unregister_device(&iwibdev->ibdev);
+}
+
+/**
+ * i40iw_destroy_rdma_device - destroy rdma device and free resources
+ * @iwibdev: IB device ptr
+ */
+void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
+{
+ if (!iwibdev)
+ return;
+
+ i40iw_unregister_rdma_device(iwibdev);
+ kfree(iwibdev->ibdev.iwcm);
+ iwibdev->ibdev.iwcm = NULL;
+ ib_dealloc_device(&iwibdev->ibdev);
+}
+
+/**
+ * i40iw_register_rdma_device - register iwarp device to IB
+ * @iwdev: iwarp device
+ */
+int i40iw_register_rdma_device(struct i40iw_device *iwdev)
+{
+ int i, ret;
+ struct i40iw_ib_device *iwibdev;
+
+ iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
+ if (!iwdev->iwibdev)
+ return -ENOSYS;
+ iwibdev = iwdev->iwibdev;
+
+ ret = ib_register_device(&iwibdev->ibdev, NULL);
+ if (ret)
+ goto error;
+
+ for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
+ ret =
+ device_create_file(&iwibdev->ibdev.dev,
+ i40iw_dev_attributes[i]);
+ if (ret) {
+ while (i > 0) {
+ i--;
+ device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
+ }
+ ib_unregister_device(&iwibdev->ibdev);
+ goto error;
+ }
+ }
+ return 0;
+error:
+ kfree(iwdev->iwibdev->ibdev.iwcm);
+ iwdev->iwibdev->ibdev.iwcm = NULL;
+ ib_dealloc_device(&iwdev->iwibdev->ibdev);
+ return -ENOSYS;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
new file mode 100644
index 000000000000..1101f77080e6
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -0,0 +1,173 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VERBS_H
+#define I40IW_VERBS_H
+
+struct i40iw_ucontext {
+ struct ib_ucontext ibucontext;
+ struct i40iw_device *iwdev;
+ struct list_head cq_reg_mem_list;
+ spinlock_t cq_reg_mem_list_lock; /* memory list for cq's */
+ struct list_head qp_reg_mem_list;
+ spinlock_t qp_reg_mem_list_lock; /* memory list for qp's */
+};
+
+struct i40iw_pd {
+ struct ib_pd ibpd;
+ struct i40iw_sc_pd sc_pd;
+ atomic_t usecount;
+};
+
+struct i40iw_hmc_pble {
+ union {
+ u32 idx;
+ dma_addr_t addr;
+ };
+};
+
+struct i40iw_cq_mr {
+ struct i40iw_hmc_pble cq_pbl;
+ dma_addr_t shadow;
+};
+
+struct i40iw_qp_mr {
+ struct i40iw_hmc_pble sq_pbl;
+ struct i40iw_hmc_pble rq_pbl;
+ dma_addr_t shadow;
+ struct page *sq_page;
+};
+
+struct i40iw_pbl {
+ struct list_head list;
+ union {
+ struct i40iw_qp_mr qp_mr;
+ struct i40iw_cq_mr cq_mr;
+ };
+
+ bool pbl_allocated;
+ u64 user_base;
+ struct i40iw_pble_alloc pble_alloc;
+ struct i40iw_mr *iwmr;
+};
+
+#define MAX_SAVE_PAGE_ADDRS 4
+struct i40iw_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ struct ib_fmr ibfmr;
+ };
+ struct ib_umem *region;
+ u16 type;
+ u32 page_cnt;
+ u32 stag;
+ u64 length;
+ u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
+ struct i40iw_pbl iwpbl;
+};
+
+struct i40iw_cq {
+ struct ib_cq ibcq;
+ struct i40iw_sc_cq sc_cq;
+ u16 cq_head;
+ u16 cq_size;
+ u16 cq_number;
+ bool user_mode;
+ u32 polled_completions;
+ u32 cq_mem_size;
+ struct i40iw_dma_mem kmem;
+ spinlock_t lock; /* for poll cq */
+ struct i40iw_pbl *iwpbl;
+};
+
+struct disconn_work {
+ struct work_struct work;
+ struct i40iw_qp *iwqp;
+};
+
+struct iw_cm_id;
+struct ietf_mpa_frame;
+struct i40iw_ud_file;
+
+struct i40iw_qp_kmode {
+ struct i40iw_dma_mem dma_mem;
+ u64 *wrid_mem;
+};
+
+struct i40iw_qp {
+ struct ib_qp ibqp;
+ struct i40iw_sc_qp sc_qp;
+ struct i40iw_device *iwdev;
+ struct i40iw_cq *iwscq;
+ struct i40iw_cq *iwrcq;
+ struct i40iw_pd *iwpd;
+ struct i40iw_qp_host_ctx_info ctx_info;
+ struct i40iwarp_offload_info iwarp_info;
+ void *allocated_buffer;
+ atomic_t refcount;
+ struct iw_cm_id *cm_id;
+ void *cm_node;
+ struct ib_mr *lsmm_mr;
+ struct work_struct work;
+ enum ib_qp_state ibqp_state;
+ u32 iwarp_state;
+ u32 qp_mem_size;
+ u32 last_aeq;
+ atomic_t close_timer_started;
+ spinlock_t lock; /* for post work requests */
+ struct i40iw_qp_context *iwqp_context;
+ void *pbl_vbase;
+ dma_addr_t pbl_pbase;
+ struct page *page;
+ u8 active_conn:1;
+ u8 user_mode:1;
+ u8 hte_added:1;
+ u8 flush_issued:1;
+ u8 destroyed:1;
+ u8 sig_all:1;
+ u8 pau_mode:1;
+ u8 rsvd:1;
+ u16 term_sq_flush_code;
+ u16 term_rq_flush_code;
+ u8 hw_iwarp_state;
+ u8 hw_tcp_state;
+ struct i40iw_qp_kmode kqp;
+ struct i40iw_dma_mem host_ctx;
+ struct timer_list terminate_timer;
+ struct i40iw_pbl *iwpbl;
+ struct i40iw_dma_mem q2_ctx_mem;
+ struct i40iw_dma_mem ietf_mem;
+};
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
new file mode 100644
index 000000000000..cb0f18340e14
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c
@@ -0,0 +1,85 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+
+/**
+ * i40iw_manage_vf_pble_bp - manage vf pble
+ * @cqp: cqp for cqp' sq wqe
+ * @info: pble info
+ * @scratch: pointer for completion
+ * @post_sq: to post and ring
+ */
+enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
+ struct i40iw_manage_vf_pble_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 *wqe;
+ u64 temp, header, pd_pl_pba = 0;
+
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ temp = LS_64(info->pd_entry_cnt, I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT) |
+ LS_64(info->first_pd_index, I40IW_CQPSQ_MVPBP_FIRST_PD_INX) |
+ LS_64(info->sd_index, I40IW_CQPSQ_MVPBP_SD_INX);
+ set_64bit_val(wqe, 16, temp);
+
+ header = LS_64((info->inv_pd_ent ? 1 : 0), I40IW_CQPSQ_MVPBP_INV_PD_ENT) |
+ LS_64(I40IW_CQP_OP_MANAGE_VF_PBLE_BP, I40IW_CQPSQ_OPCODE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ set_64bit_val(wqe, 24, header);
+
+ pd_pl_pba = LS_64(info->pd_pl_pba >> 3, I40IW_CQPSQ_MVPBP_PD_PLPBA);
+ set_64bit_val(wqe, 32, pd_pl_pba);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE VF_PBLE_BP WQE", wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
+ i40iw_manage_vf_pble_bp
+};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
new file mode 100644
index 000000000000..f649f3a62e13
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h
@@ -0,0 +1,62 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VF_H
+#define I40IW_VF_H
+
+struct i40iw_sc_cqp;
+
+struct i40iw_manage_vf_pble_info {
+ u32 sd_index;
+ u16 first_pd_index;
+ u16 pd_entry_cnt;
+ u8 inv_pd_ent;
+ u64 pd_pl_pba;
+};
+
+struct i40iw_vf_cqp_ops {
+ enum i40iw_status_code (*manage_vf_pble_bp)(struct i40iw_sc_cqp *,
+ struct i40iw_manage_vf_pble_info *,
+ u64,
+ bool);
+};
+
+enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
+ struct i40iw_manage_vf_pble_info *info,
+ u64 scratch,
+ bool post_sq);
+
+extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
new file mode 100644
index 000000000000..6b68f7890b76
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -0,0 +1,748 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * vchnl_vf_send_get_ver_req - Request Channel version
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_ver_req(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_req *vchnl_req)
+{
+ enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+ if (!dev->vchnl_up)
+ return ret_code;
+
+ memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+ vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
+ vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_VER;
+ vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_VER_V0;
+ ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+ return ret_code;
+}
+
+/**
+ * vchnl_vf_send_get_hmc_fcn_req - Request HMC Function from VF
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_hmc_fcn_req(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_req *vchnl_req)
+{
+ enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+ if (!dev->vchnl_up)
+ return ret_code;
+
+ memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+ vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
+ vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_HMC_FCN;
+ vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_HMC_FCN_V0;
+ ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+ return ret_code;
+}
+
+/**
+ * vchnl_vf_send_get_pe_stats_req - Request PE stats from VF
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_req *vchnl_req)
+{
+ enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+ if (!dev->vchnl_up)
+ return ret_code;
+
+ memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+ vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_dev_hw_stats) - 1;
+ vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_STATS;
+ vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_STATS_V0;
+ ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+ return ret_code;
+}
+
+/**
+ * vchnl_vf_send_add_hmc_objs_req - Add HMC objects
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_req *vchnl_req,
+ enum i40iw_hmc_rsrc_type rsrc_type,
+ u32 start_index,
+ u32 rsrc_count)
+{
+ enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+ struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+
+ if (!dev->vchnl_up)
+ return ret_code;
+
+ add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+ memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+ memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
+ vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
+ vchnl_msg->iw_op_code = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE;
+ vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0;
+ add_hmc_obj->obj_type = (u16)rsrc_type;
+ add_hmc_obj->start_index = start_index;
+ add_hmc_obj->obj_count = rsrc_count;
+ ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+ return ret_code;
+}
+
+/**
+ * vchnl_vf_send_del_hmc_objs_req - del HMC objects
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ * @ rsrc_type - resource type to delete
+ * @ start_index - starting index for resource
+ * @ rsrc_count - number of resource type to delete
+ */
+static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev,
+ struct i40iw_virtchnl_req *vchnl_req,
+ enum i40iw_hmc_rsrc_type rsrc_type,
+ u32 start_index,
+ u32 rsrc_count)
+{
+ enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+ struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+
+ if (!dev->vchnl_up)
+ return ret_code;
+
+ add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+ memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+ memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
+ vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
+ vchnl_msg->iw_op_code = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE;
+ vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0;
+ add_hmc_obj->obj_type = (u16)rsrc_type;
+ add_hmc_obj->start_index = start_index;
+ add_hmc_obj->obj_count = rsrc_count;
+ ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+ return ret_code;
+}
+
+/**
+ * vchnl_pf_send_get_ver_resp - Send channel version to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ struct i40iw_virtchnl_op_buf *vchnl_msg)
+{
+ enum i40iw_status_code ret_code;
+ u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u32) - 1];
+ struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+ memset(resp_buffer, 0, sizeof(*resp_buffer));
+ vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+ vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+ vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+ *((u32 *)vchnl_msg_resp->iw_chnl_buf) = I40IW_VCHNL_CHNL_VER_V0;
+ ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_get_hmc_fcn_resp - Send HMC Function to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ struct i40iw_virtchnl_op_buf *vchnl_msg,
+ u16 hmc_fcn)
+{
+ enum i40iw_status_code ret_code;
+ u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u16) - 1];
+ struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+ memset(resp_buffer, 0, sizeof(*resp_buffer));
+ vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+ vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+ vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+ *((u16 *)vchnl_msg_resp->iw_chnl_buf) = hmc_fcn;
+ ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_get_pe_stats_resp - Send PE Stats to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ * @hw_stats: HW Stats struct
+ */
+
+static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ struct i40iw_virtchnl_op_buf *vchnl_msg,
+ struct i40iw_dev_hw_stats hw_stats)
+{
+ enum i40iw_status_code ret_code;
+ u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
+ struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+ memset(resp_buffer, 0, sizeof(*resp_buffer));
+ vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+ vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+ vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+ *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats;
+ ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_error_resp - Send an error response to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
+ struct i40iw_virtchnl_op_buf *vchnl_msg,
+ u16 op_ret_code)
+{
+ enum i40iw_status_code ret_code;
+ u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf)];
+ struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+ memset(resp_buffer, 0, sizeof(resp_buffer));
+ vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+ vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+ vchnl_msg_resp->iw_op_ret_code = (u16)op_ret_code;
+ ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn
+ * @cqp_req_param: CQP Request param value
+ * @not_used: unused CQP callback parameter
+ */
+static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param,
+ struct i40iw_ccq_cqe_info *cqe_info)
+{
+ struct i40iw_vfdev *vf_dev = callback_param;
+ struct i40iw_virt_mem vf_dev_mem;
+
+ if (cqe_info->error) {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "CQP Completion Error on Get HMC Function. Maj = 0x%04x, Minor = 0x%04x\n",
+ cqe_info->maj_err_code, cqe_info->min_err_code);
+ dev->vf_dev[vf_dev->iw_vf_idx] = NULL;
+ vchnl_pf_send_error_resp(dev, vf_dev->vf_id, &vf_dev->vf_msg_buffer.vchnl_msg,
+ (u16)I40IW_ERR_CQP_COMPL_ERROR);
+ vf_dev_mem.va = vf_dev;
+ vf_dev_mem.size = sizeof(*vf_dev);
+ i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "CQP Completion Operation Return information = 0x%08x\n",
+ cqe_info->op_ret_val);
+ vf_dev->pmf_index = (u16)cqe_info->op_ret_val;
+ vf_dev->msg_count--;
+ vchnl_pf_send_get_hmc_fcn_resp(dev,
+ vf_dev->vf_id,
+ &vf_dev->vf_msg_buffer.vchnl_msg,
+ vf_dev->pmf_index);
+ }
+}
+
+/**
+ * pf_add_hmc_obj - Callback for Add HMC Object
+ * @vf_dev: pointer to the VF Device
+ */
+static void pf_add_hmc_obj_callback(void *work_vf_dev)
+{
+ struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
+ struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
+ struct i40iw_hmc_create_obj_info info;
+ struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+ enum i40iw_status_code ret_code;
+
+ if (!vf_dev->pf_hmc_initialized) {
+ ret_code = i40iw_pf_init_vfhmc(vf_dev->pf_dev, (u8)vf_dev->pmf_index, NULL);
+ if (ret_code)
+ goto add_out;
+ vf_dev->pf_hmc_initialized = true;
+ }
+
+ add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+
+ memset(&info, 0, sizeof(info));
+ info.hmc_info = hmc_info;
+ info.is_pf = false;
+ info.rsrc_type = (u32)add_hmc_obj->obj_type;
+ info.entry_type = (info.rsrc_type == I40IW_HMC_IW_PBLE) ? I40IW_SD_TYPE_PAGED : I40IW_SD_TYPE_DIRECT;
+ info.start_idx = add_hmc_obj->start_index;
+ info.count = add_hmc_obj->obj_count;
+ i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
+ "I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE. Add %u type %u objects\n",
+ info.count, info.rsrc_type);
+ ret_code = i40iw_sc_create_hmc_obj(vf_dev->pf_dev, &info);
+ if (!ret_code)
+ vf_dev->hmc_info.hmc_obj[add_hmc_obj->obj_type].cnt = add_hmc_obj->obj_count;
+add_out:
+ vf_dev->msg_count--;
+ vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
+}
+
+/**
+ * pf_del_hmc_obj_callback - Callback for delete HMC Object
+ * @work_vf_dev: pointer to the VF Device
+ */
+static void pf_del_hmc_obj_callback(void *work_vf_dev)
+{
+ struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
+ struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
+ struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
+ struct i40iw_hmc_del_obj_info info;
+ struct i40iw_virtchnl_hmc_obj_range *del_hmc_obj;
+ enum i40iw_status_code ret_code = I40IW_SUCCESS;
+
+ if (!vf_dev->pf_hmc_initialized)
+ goto del_out;
+
+ del_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+
+ memset(&info, 0, sizeof(info));
+ info.hmc_info = hmc_info;
+ info.is_pf = false;
+ info.rsrc_type = (u32)del_hmc_obj->obj_type;
+ info.start_idx = del_hmc_obj->start_index;
+ info.count = del_hmc_obj->obj_count;
+ i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
+ "I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE. Delete %u type %u objects\n",
+ info.count, info.rsrc_type);
+ ret_code = i40iw_sc_del_hmc_obj(vf_dev->pf_dev, &info, false);
+del_out:
+ vf_dev->msg_count--;
+ vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
+}
+
+/**
+ * i40iw_vchnl_recv_pf - Receive PF virtual channel messages
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @msg: Virtual channel message buffer pointer
+ * @len: Length of the virtual channels message
+ */
+enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ u8 *msg,
+ u16 len)
+{
+ struct i40iw_virtchnl_op_buf *vchnl_msg = (struct i40iw_virtchnl_op_buf *)msg;
+ struct i40iw_vfdev *vf_dev = NULL;
+ struct i40iw_hmc_fcn_info hmc_fcn_info;
+ u16 iw_vf_idx;
+ u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
+ struct i40iw_virt_mem vf_dev_mem;
+ struct i40iw_virtchnl_work_info work_info;
+ struct i40iw_dev_pestat *devstat;
+ enum i40iw_status_code ret_code;
+ unsigned long flags;
+
+ if (!dev || !msg || !len)
+ return I40IW_ERR_PARAM;
+
+ if (!dev->vchnl_up)
+ return I40IW_ERR_NOT_READY;
+ if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
+ if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
+ vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+ else
+ vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+ return I40IW_SUCCESS;
+ }
+ for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT;
+ iw_vf_idx++) {
+ if (!dev->vf_dev[iw_vf_idx]) {
+ if (first_avail_iw_vf ==
+ I40IW_MAX_PE_ENABLED_VF_COUNT)
+ first_avail_iw_vf = iw_vf_idx;
+ continue;
+ }
+ if (dev->vf_dev[iw_vf_idx]->vf_id == vf_id) {
+ vf_dev = dev->vf_dev[iw_vf_idx];
+ break;
+ }
+ }
+ if (vf_dev) {
+ if (!vf_dev->msg_count) {
+ vf_dev->msg_count++;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "VF%u already has a channel message in progress.\n",
+ vf_id);
+ return I40IW_SUCCESS;
+ }
+ }
+ switch (vchnl_msg->iw_op_code) {
+ case I40IW_VCHNL_OP_GET_HMC_FCN:
+ if (!vf_dev &&
+ (first_avail_iw_vf != I40IW_MAX_PE_ENABLED_VF_COUNT)) {
+ ret_code = i40iw_allocate_virt_mem(dev->hw, &vf_dev_mem, sizeof(struct i40iw_vfdev) +
+ (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX));
+ if (!ret_code) {
+ vf_dev = vf_dev_mem.va;
+ vf_dev->stats_initialized = false;
+ vf_dev->pf_dev = dev;
+ vf_dev->msg_count = 1;
+ vf_dev->vf_id = vf_id;
+ vf_dev->iw_vf_idx = first_avail_iw_vf;
+ vf_dev->pf_hmc_initialized = false;
+ vf_dev->hmc_info.hmc_obj = (struct i40iw_hmc_obj_info *)(&vf_dev[1]);
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "vf_dev %p, hmc_info %p, hmc_obj %p\n",
+ vf_dev, &vf_dev->hmc_info, vf_dev->hmc_info.hmc_obj);
+ dev->vf_dev[first_avail_iw_vf] = vf_dev;
+ iw_vf_idx = first_avail_iw_vf;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "VF%u Unable to allocate a VF device structure.\n",
+ vf_id);
+ vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg, (u16)I40IW_ERR_NO_MEMORY);
+ return I40IW_SUCCESS;
+ }
+ memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+ hmc_fcn_info.callback_fcn = pf_cqp_get_hmc_fcn_callback;
+ hmc_fcn_info.vf_id = vf_id;
+ hmc_fcn_info.iw_vf_idx = vf_dev->iw_vf_idx;
+ hmc_fcn_info.cqp_callback_param = vf_dev;
+ hmc_fcn_info.free_fcn = false;
+ ret_code = i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "VF%u error CQP HMC Function operation.\n",
+ vf_id);
+ ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat);
+ if (ret_code)
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "VF%u - i40iw_device_init_pestat failed\n",
+ vf_id);
+ vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat,
+ (u8)vf_dev->pmf_index,
+ dev->hw, false);
+ vf_dev->stats_initialized = true;
+ } else {
+ if (vf_dev) {
+ vf_dev->msg_count--;
+ vchnl_pf_send_get_hmc_fcn_resp(dev, vf_id, vchnl_msg, vf_dev->pmf_index);
+ } else {
+ vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg,
+ (u16)I40IW_ERR_NO_MEMORY);
+ }
+ }
+ break;
+ case I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE:
+ if (!vf_dev)
+ return I40IW_ERR_BAD_PTR;
+ work_info.worker_vf_dev = vf_dev;
+ work_info.callback_fcn = pf_add_hmc_obj_callback;
+ memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+ i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
+ break;
+ case I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE:
+ if (!vf_dev)
+ return I40IW_ERR_BAD_PTR;
+ work_info.worker_vf_dev = vf_dev;
+ work_info.callback_fcn = pf_del_hmc_obj_callback;
+ memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+ i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
+ break;
+ case I40IW_VCHNL_OP_GET_STATS:
+ if (!vf_dev)
+ return I40IW_ERR_BAD_PTR;
+ devstat = &vf_dev->dev_pestat;
+ spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+ devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
+ spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+ vf_dev->msg_count--;
+ vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats);
+ break;
+ default:
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "40iw_vchnl_recv_pf: Invalid OpCode 0x%x\n",
+ vchnl_msg->iw_op_code);
+ vchnl_pf_send_error_resp(dev, vf_id,
+ vchnl_msg, (u16)I40IW_ERR_NOT_IMPLEMENTED);
+ }
+ return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vchnl_recv_vf - Receive VF virtual channel messages
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @msg: Virtual channel message buffer pointer
+ * @len: Length of the virtual channels message
+ */
+enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ u8 *msg,
+ u16 len)
+{
+ struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)msg;
+ struct i40iw_virtchnl_req *vchnl_req;
+
+ vchnl_req = (struct i40iw_virtchnl_req *)(uintptr_t)vchnl_msg_resp->iw_chnl_op_ctx;
+ vchnl_req->ret_code = (enum i40iw_status_code)vchnl_msg_resp->iw_op_ret_code;
+ if (len == (sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1)) {
+ if (vchnl_req->parm_len && vchnl_req->parm)
+ memcpy(vchnl_req->parm, vchnl_msg_resp->iw_chnl_buf, vchnl_req->parm_len);
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: Got response, data size %u\n", __func__,
+ vchnl_req->parm_len);
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s: error length on response, Got %u, expected %u\n", __func__,
+ len, (u32)(sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1));
+ }
+
+ return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vchnl_vf_get_ver - Request Channel version
+ * @dev: IWARP device pointer
+ * @vchnl_ver: Virtual channel message version pointer
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
+ u32 *vchnl_ver)
+{
+ struct i40iw_virtchnl_req vchnl_req;
+ enum i40iw_status_code ret_code;
+
+ memset(&vchnl_req, 0, sizeof(vchnl_req));
+ vchnl_req.dev = dev;
+ vchnl_req.parm = vchnl_ver;
+ vchnl_req.parm_len = sizeof(*vchnl_ver);
+ vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+ ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
+ if (!ret_code) {
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (!ret_code)
+ ret_code = vchnl_req.ret_code;
+ else
+ dev->vchnl_up = false;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s Send message failed 0x%0x\n", __func__, ret_code);
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_get_hmc_fcn - Request HMC Function
+ * @dev: IWARP device pointer
+ * @hmc_fcn: HMC function index pointer
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
+ u16 *hmc_fcn)
+{
+ struct i40iw_virtchnl_req vchnl_req;
+ enum i40iw_status_code ret_code;
+
+ memset(&vchnl_req, 0, sizeof(vchnl_req));
+ vchnl_req.dev = dev;
+ vchnl_req.parm = hmc_fcn;
+ vchnl_req.parm_len = sizeof(*hmc_fcn);
+ vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+ ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
+ if (!ret_code) {
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (!ret_code)
+ ret_code = vchnl_req.ret_code;
+ else
+ dev->vchnl_up = false;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s Send message failed 0x%0x\n", __func__, ret_code);
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_add_hmc_objs - Add HMC Object
+ * @dev: IWARP device pointer
+ * @rsrc_type: HMC Resource type
+ * @start_index: Starting index of the objects to be added
+ * @rsrc_count: Number of resources to be added
+ */
+enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
+ enum i40iw_hmc_rsrc_type rsrc_type,
+ u32 start_index,
+ u32 rsrc_count)
+{
+ struct i40iw_virtchnl_req vchnl_req;
+ enum i40iw_status_code ret_code;
+
+ memset(&vchnl_req, 0, sizeof(vchnl_req));
+ vchnl_req.dev = dev;
+ vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+ ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
+ &vchnl_req,
+ rsrc_type,
+ start_index,
+ rsrc_count);
+ if (!ret_code) {
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (!ret_code)
+ ret_code = vchnl_req.ret_code;
+ else
+ dev->vchnl_up = false;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s Send message failed 0x%0x\n", __func__, ret_code);
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_del_hmc_obj - del HMC obj
+ * @dev: IWARP device pointer
+ * @rsrc_type: HMC Resource type
+ * @start_index: Starting index of the object to delete
+ * @rsrc_count: Number of resources to be delete
+ */
+enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
+ enum i40iw_hmc_rsrc_type rsrc_type,
+ u32 start_index,
+ u32 rsrc_count)
+{
+ struct i40iw_virtchnl_req vchnl_req;
+ enum i40iw_status_code ret_code;
+
+ memset(&vchnl_req, 0, sizeof(vchnl_req));
+ vchnl_req.dev = dev;
+ vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+ ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
+ &vchnl_req,
+ rsrc_type,
+ start_index,
+ rsrc_count);
+ if (!ret_code) {
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (!ret_code)
+ ret_code = vchnl_req.ret_code;
+ else
+ dev->vchnl_up = false;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s Send message failed 0x%0x\n", __func__, ret_code);
+ }
+ return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_get_pe_stats - Get PE stats
+ * @dev: IWARP device pointer
+ * @hw_stats: HW stats struct
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
+ struct i40iw_dev_hw_stats *hw_stats)
+{
+ struct i40iw_virtchnl_req vchnl_req;
+ enum i40iw_status_code ret_code;
+
+ memset(&vchnl_req, 0, sizeof(vchnl_req));
+ vchnl_req.dev = dev;
+ vchnl_req.parm = hw_stats;
+ vchnl_req.parm_len = sizeof(*hw_stats);
+ vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+ ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
+ if (!ret_code) {
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (!ret_code)
+ ret_code = vchnl_req.ret_code;
+ else
+ dev->vchnl_up = false;
+ } else {
+ i40iw_debug(dev, I40IW_DEBUG_VIRT,
+ "%s Send message failed 0x%0x\n", __func__, ret_code);
+ }
+ return ret_code;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
new file mode 100644
index 000000000000..24886ef08293
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
@@ -0,0 +1,124 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VIRTCHNL_H
+#define I40IW_VIRTCHNL_H
+
+#include "i40iw_hmc.h"
+
+#pragma pack(push, 1)
+
+struct i40iw_virtchnl_op_buf {
+ u16 iw_op_code;
+ u16 iw_op_ver;
+ u16 iw_chnl_buf_len;
+ u16 rsvd;
+ u64 iw_chnl_op_ctx;
+ /* Member alignment MUST be maintained above this location */
+ u8 iw_chnl_buf[1];
+};
+
+struct i40iw_virtchnl_resp_buf {
+ u64 iw_chnl_op_ctx;
+ u16 iw_chnl_buf_len;
+ s16 iw_op_ret_code;
+ /* Member alignment MUST be maintained above this location */
+ u16 rsvd[2];
+ u8 iw_chnl_buf[1];
+};
+
+enum i40iw_virtchnl_ops {
+ I40IW_VCHNL_OP_GET_VER = 0,
+ I40IW_VCHNL_OP_GET_HMC_FCN,
+ I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE,
+ I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE,
+ I40IW_VCHNL_OP_GET_STATS
+};
+
+#define I40IW_VCHNL_OP_GET_VER_V0 0
+#define I40IW_VCHNL_OP_GET_HMC_FCN_V0 0
+#define I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0 0
+#define I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0 0
+#define I40IW_VCHNL_OP_GET_STATS_V0 0
+#define I40IW_VCHNL_CHNL_VER_V0 0
+
+struct i40iw_dev_hw_stats;
+
+struct i40iw_virtchnl_hmc_obj_range {
+ u16 obj_type;
+ u16 rsvd;
+ u32 start_index;
+ u32 obj_count;
+};
+
+enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ u8 *msg,
+ u16 len);
+
+enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
+ u32 vf_id,
+ u8 *msg,
+ u16 len);
+
+struct i40iw_virtchnl_req {
+ struct i40iw_sc_dev *dev;
+ struct i40iw_virtchnl_op_buf *vchnl_msg;
+ void *parm;
+ u32 vf_id;
+ u16 parm_len;
+ s16 ret_code;
+};
+
+#pragma pack(pop)
+
+enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
+ u32 *vchnl_ver);
+
+enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
+ u16 *hmc_fcn);
+
+enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
+ enum i40iw_hmc_rsrc_type rsrc_type,
+ u32 start_index,
+ u32 rsrc_count);
+
+enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
+ enum i40iw_hmc_rsrc_type rsrc_type,
+ u32 start_index,
+ u32 rsrc_count);
+
+enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
+ struct i40iw_dev_hw_stats *hw_stats);
+#endif
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4e851889355a..7493a83acd28 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
new file mode 100644
index 000000000000..c1b9de800fe5
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_ib.h"
+
+static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
+{
+ switch (mlx_policy) {
+ case MLX5_POLICY_DOWN:
+ return IFLA_VF_LINK_STATE_DISABLE;
+ case MLX5_POLICY_UP:
+ return IFLA_VF_LINK_STATE_ENABLE;
+ case MLX5_POLICY_FOLLOW:
+ return IFLA_VF_LINK_STATE_AUTO;
+ default:
+ return __IFLA_VF_LINK_STATE_MAX;
+ }
+}
+
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *info)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(mdev, 1, 1, vf + 1, rep);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to query port policy for vf %d (%d)\n",
+ vf, err);
+ goto free;
+ }
+ memset(info, 0, sizeof(*info));
+ info->linkstate = mlx_to_net_policy(rep->policy);
+ if (info->linkstate == __IFLA_VF_LINK_STATE_MAX)
+ err = -EINVAL;
+
+free:
+ kfree(rep);
+ return err;
+}
+
+static inline enum port_state_policy net_to_mlx_policy(int policy)
+{
+ switch (policy) {
+ case IFLA_VF_LINK_STATE_DISABLE:
+ return MLX5_POLICY_DOWN;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ return MLX5_POLICY_UP;
+ case IFLA_VF_LINK_STATE_AUTO:
+ return MLX5_POLICY_FOLLOW;
+ default:
+ return MLX5_POLICY_INVALID;
+ }
+}
+
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+ u8 port, int state)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_hca_vport_context *in;
+ int err;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->policy = net_to_mlx_policy(state);
+ if (in->policy == MLX5_POLICY_INVALID) {
+ err = -EINVAL;
+ goto out;
+ }
+ in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
+ err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+
+out:
+ kfree(in);
+ return err;
+}
+
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+ u8 port, struct ifla_vf_stats *stats)
+{
+ int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ struct mlx5_core_dev *mdev;
+ struct mlx5_ib_dev *dev;
+ void *out;
+ int err;
+
+ dev = to_mdev(device);
+ mdev = dev->mdev;
+
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+ if (err)
+ goto ex;
+
+ stats->rx_packets = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.packets);
+ stats->tx_packets = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.packets);
+ stats->rx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.octets);
+ stats->tx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.octets);
+ stats->multicast = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_multicast.packets);
+
+ex:
+ kfree(out);
+ return err;
+}
+
+static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_hca_vport_context *in;
+ int err;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
+ in->node_guid = guid;
+ err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ kfree(in);
+ return err;
+}
+
+static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_hca_vport_context *in;
+ int err;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
+ in->port_guid = guid;
+ err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ kfree(in);
+ return err;
+}
+
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+ u64 guid, int type)
+{
+ if (type == IFLA_VF_IB_NODE_GUID)
+ return set_vf_node_guid(device, vf, port, guid);
+ else if (type == IFLA_VF_IB_PORT_GUID)
+ return set_vf_port_guid(device, vf, port, guid);
+
+ return -EINVAL;
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 41d8a0036465..1534af113058 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -208,7 +208,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
- err = mlx5_core_query_vport_counter(dev->mdev, 0,
+ err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
port_num, out_cnt, sz);
if (!err)
pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index edd8b8741846..5acf346e048e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -284,7 +284,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
- return !dev->mdev->issi;
+ return !MLX5_CAP_GEN(dev->mdev, ib_virt);
}
enum {
@@ -563,6 +563,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+ if (!mlx5_core_is_pf(mdev))
+ props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+
return 0;
}
@@ -700,6 +703,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->qkey_viol_cntr = rep->qkey_violation_counter;
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
+ props->grh_required = rep->grh_required;
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
@@ -2350,6 +2354,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ if (mlx5_core_is_pf(mdev)) {
+ dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
+ dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
+ dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
+ dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
+ }
mlx5_ib_internal_fill_odp_caps(dev);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 76b2b42e0535..f16c818ad2e6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -776,6 +776,14 @@ void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end);
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
+ u8 port, struct ifla_vf_info *info);
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+ u8 port, int state);
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+ u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+ u64 guid, int type);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 495be09781b1..e0fdb9201423 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_QIB
tristate "Intel PCIe HCA support"
- depends on 64BIT
+ depends on 64BIT && INFINIBAND_RDMAVT
---help---
This is a low-level driver for Intel PCIe QLE InfiniBand host
channel adapters. This driver does not support the Intel
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index 57f8103e51f8..79ebd79e8405 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -1,11 +1,11 @@
obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
-ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
- qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \
- qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \
- qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
+ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \
+ qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \
+ qib_mad.o qib_pcie.o qib_pio_copy.o \
+ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o \
qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
- qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
+ qib_user_pages.o qib_user_sdma.o qib_iba7220.o \
qib_sd7220.o qib_iba7322.o qib_verbs.o
# 6120 has no fallback if no MSI interrupts, others can do INTx
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 7df16f74bb45..bbf0a163aeab 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -52,6 +52,7 @@
#include <linux/kref.h>
#include <linux/sched.h>
#include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
#include "qib_common.h"
#include "qib_verbs.h"
@@ -229,9 +230,6 @@ struct qib_ctxtdata {
u8 redirect_seq_cnt;
/* ctxt rcvhdrq head offset */
u32 head;
- /* lookaside fields */
- struct qib_qp *lookaside_qp;
- u32 lookaside_qpn;
/* QPs waiting for context processing */
struct list_head qp_wait_list;
#ifdef CONFIG_DEBUG_FS
@@ -240,7 +238,7 @@ struct qib_ctxtdata {
#endif
};
-struct qib_sge_state;
+struct rvt_sge_state;
struct qib_sdma_txreq {
int flags;
@@ -258,14 +256,14 @@ struct qib_sdma_desc {
struct qib_verbs_txreq {
struct qib_sdma_txreq txreq;
- struct qib_qp *qp;
- struct qib_swqe *wqe;
+ struct rvt_qp *qp;
+ struct rvt_swqe *wqe;
u32 dwords;
u16 hdr_dwords;
u16 hdr_inx;
struct qib_pio_header *align_buf;
- struct qib_mregion *mr;
- struct qib_sge_state *ss;
+ struct rvt_mregion *mr;
+ struct rvt_sge_state *ss;
};
#define QIB_SDMA_TXREQ_F_USELARGEBUF 0x1
@@ -1096,8 +1094,6 @@ struct qib_devdata {
u16 psxmitwait_check_rate;
/* high volume overflow errors defered to tasklet */
struct tasklet_struct error_tasklet;
- /* per device cq worker */
- struct kthread_worker *worker;
int assigned_node_id; /* NUMA node closest to HCA */
};
@@ -1135,8 +1131,9 @@ extern spinlock_t qib_devs_lock;
extern struct qib_devdata *qib_lookup(int unit);
extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist;
-
+extern u16 qpt_mask;
extern unsigned qib_cc_table_size;
+
int qib_init(struct qib_devdata *, int);
int init_chip_wc_pat(struct qib_devdata *dd, u32);
int qib_enable_wc(struct qib_devdata *dd);
@@ -1323,7 +1320,7 @@ void __qib_sdma_intr(struct qib_pportdata *);
void qib_sdma_intr(struct qib_pportdata *);
void qib_user_sdma_send_desc(struct qib_pportdata *dd,
struct list_head *pktlist);
-int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
+int qib_sdma_verbs_send(struct qib_pportdata *, struct rvt_sge_state *,
u32, struct qib_verbs_txreq *);
/* ppd->sdma_lock should be locked before calling this. */
int qib_sdma_make_progress(struct qib_pportdata *dd);
@@ -1454,6 +1451,8 @@ u64 qib_sps_ints(void);
dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
size_t, int);
const char *qib_get_unit_name(int unit);
+const char *qib_get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
/*
* Flush write combining store buffers (if present) and perform a write
@@ -1540,4 +1539,14 @@ struct qib_hwerror_msgs {
void qib_format_hwerrors(u64 hwerrs,
const struct qib_hwerror_msgs *hwerrmsgs,
size_t nhwerrmsgs, char *msg, size_t lmsg);
+
+void qib_stop_send_queue(struct rvt_qp *qp);
+void qib_quiesce_qp(struct rvt_qp *qp);
+void qib_flush_qp_waiters(struct rvt_qp *qp);
+int qib_mtu_to_path_mtu(u32 mtu);
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+void qib_notify_error_qp(struct rvt_qp *qp);
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_attr *attr);
+
#endif /* _QIB_KERNEL_H */
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 4fb78abd8ba1..1d6e63eb1146 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -742,14 +742,11 @@ struct qib_tid_session_member {
#define SIZE_OF_CRC 1
#define QIB_DEFAULT_P_KEY 0xFFFF
-#define QIB_PERMISSIVE_LID 0xFFFF
#define QIB_AETH_CREDIT_SHIFT 24
#define QIB_AETH_CREDIT_MASK 0x1F
#define QIB_AETH_CREDIT_INVAL 0x1F
#define QIB_PSN_MASK 0xFFFFFF
#define QIB_MSN_MASK 0xFFFFFF
-#define QIB_QPN_MASK 0xFFFFFF
-#define QIB_MULTICAST_LID_BASE 0xC000
#define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
#define QIB_MULTICAST_QPN 0xFFFFFF
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
deleted file mode 100644
index 2b45d0b02300..000000000000
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2013 Intel Corporation. All rights reserved.
- * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/kthread.h>
-
-#include "qib_verbs.h"
-#include "qib.h"
-
-/**
- * qib_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
-{
- struct qib_cq_wc *wc;
- unsigned long flags;
- u32 head;
- u32 next;
-
- spin_lock_irqsave(&cq->lock, flags);
-
- /*
- * Note that the head pointer might be writable by user processes.
- * Take care to verify it is a sane value.
- */
- wc = cq->queue;
- head = wc->head;
- if (head >= (unsigned) cq->ibcq.cqe) {
- head = cq->ibcq.cqe;
- next = 0;
- } else
- next = head + 1;
- if (unlikely(next == wc->tail)) {
- spin_unlock_irqrestore(&cq->lock, flags);
- if (cq->ibcq.event_handler) {
- struct ib_event ev;
-
- ev.device = cq->ibcq.device;
- ev.element.cq = &cq->ibcq;
- ev.event = IB_EVENT_CQ_ERR;
- cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
- }
- return;
- }
- if (cq->ip) {
- wc->uqueue[head].wr_id = entry->wr_id;
- wc->uqueue[head].status = entry->status;
- wc->uqueue[head].opcode = entry->opcode;
- wc->uqueue[head].vendor_err = entry->vendor_err;
- wc->uqueue[head].byte_len = entry->byte_len;
- wc->uqueue[head].ex.imm_data =
- (__u32 __force)entry->ex.imm_data;
- wc->uqueue[head].qp_num = entry->qp->qp_num;
- wc->uqueue[head].src_qp = entry->src_qp;
- wc->uqueue[head].wc_flags = entry->wc_flags;
- wc->uqueue[head].pkey_index = entry->pkey_index;
- wc->uqueue[head].slid = entry->slid;
- wc->uqueue[head].sl = entry->sl;
- wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
- wc->uqueue[head].port_num = entry->port_num;
- /* Make sure entry is written before the head index. */
- smp_wmb();
- } else
- wc->kqueue[head] = *entry;
- wc->head = next;
-
- if (cq->notify == IB_CQ_NEXT_COMP ||
- (cq->notify == IB_CQ_SOLICITED &&
- (solicited || entry->status != IB_WC_SUCCESS))) {
- struct kthread_worker *worker;
- /*
- * This will cause send_complete() to be called in
- * another thread.
- */
- smp_rmb();
- worker = cq->dd->worker;
- if (likely(worker)) {
- cq->notify = IB_CQ_NONE;
- cq->triggered++;
- queue_kthread_work(worker, &cq->comptask);
- }
- }
-
- spin_unlock_irqrestore(&cq->lock, flags);
-}
-
-/**
- * qib_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context. Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
- struct qib_cq *cq = to_icq(ibcq);
- struct qib_cq_wc *wc;
- unsigned long flags;
- int npolled;
- u32 tail;
-
- /* The kernel can only poll a kernel completion queue */
- if (cq->ip) {
- npolled = -EINVAL;
- goto bail;
- }
-
- spin_lock_irqsave(&cq->lock, flags);
-
- wc = cq->queue;
- tail = wc->tail;
- if (tail > (u32) cq->ibcq.cqe)
- tail = (u32) cq->ibcq.cqe;
- for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
- if (tail == wc->head)
- break;
- /* The kernel doesn't need a RMB since it has the lock. */
- *entry = wc->kqueue[tail];
- if (tail >= cq->ibcq.cqe)
- tail = 0;
- else
- tail++;
- }
- wc->tail = tail;
-
- spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
- return npolled;
-}
-
-static void send_complete(struct kthread_work *work)
-{
- struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
-
- /*
- * The completion handler will most likely rearm the notification
- * and poll for all pending entries. If a new completion entry
- * is added while we are in this routine, queue_work()
- * won't call us again until we return so we check triggered to
- * see if we need to call the handler again.
- */
- for (;;) {
- u8 triggered = cq->triggered;
-
- /*
- * IPoIB connected mode assumes the callback is from a
- * soft IRQ. We simulate this by blocking "bottom halves".
- * See the implementation for ipoib_cm_handle_tx_wc(),
- * netif_tx_lock_bh() and netif_tx_lock().
- */
- local_bh_disable();
- cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
- local_bh_enable();
-
- if (cq->triggered == triggered)
- return;
- }
-}
-
-/**
- * qib_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the QLogic_IB driver
- * @udata: user data for libibverbs.so
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- int entries = attr->cqe;
- struct qib_ibdev *dev = to_idev(ibdev);
- struct qib_cq *cq;
- struct qib_cq_wc *wc;
- struct ib_cq *ret;
- u32 sz;
-
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- if (entries < 1 || entries > ib_qib_max_cqes) {
- ret = ERR_PTR(-EINVAL);
- goto done;
- }
-
- /* Allocate the completion queue structure. */
- cq = kmalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
- ret = ERR_PTR(-ENOMEM);
- goto done;
- }
-
- /*
- * Allocate the completion queue entries and head/tail pointers.
- * This is allocated separately so that it can be resized and
- * also mapped into user space.
- * We need to use vmalloc() in order to support mmap and large
- * numbers of entries.
- */
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
- else
- sz += sizeof(struct ib_wc) * (entries + 1);
- wc = vmalloc_user(sz);
- if (!wc) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_cq;
- }
-
- /*
- * Return the address of the WC as the offset to mmap.
- * See qib_mmap() for details.
- */
- if (udata && udata->outlen >= sizeof(__u64)) {
- int err;
-
- cq->ip = qib_create_mmap_info(dev, sz, context, wc);
- if (!cq->ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_wc;
- }
-
- err = ib_copy_to_udata(udata, &cq->ip->offset,
- sizeof(cq->ip->offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_ip;
- }
- } else
- cq->ip = NULL;
-
- spin_lock(&dev->n_cqs_lock);
- if (dev->n_cqs_allocated == ib_qib_max_cqs) {
- spin_unlock(&dev->n_cqs_lock);
- ret = ERR_PTR(-ENOMEM);
- goto bail_ip;
- }
-
- dev->n_cqs_allocated++;
- spin_unlock(&dev->n_cqs_lock);
-
- if (cq->ip) {
- spin_lock_irq(&dev->pending_lock);
- list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
- }
-
- /*
- * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
- * The number of entries should be >= the number requested or return
- * an error.
- */
- cq->dd = dd_from_dev(dev);
- cq->ibcq.cqe = entries;
- cq->notify = IB_CQ_NONE;
- cq->triggered = 0;
- spin_lock_init(&cq->lock);
- init_kthread_work(&cq->comptask, send_complete);
- wc->head = 0;
- wc->tail = 0;
- cq->queue = wc;
-
- ret = &cq->ibcq;
-
- goto done;
-
-bail_ip:
- kfree(cq->ip);
-bail_wc:
- vfree(wc);
-bail_cq:
- kfree(cq);
-done:
- return ret;
-}
-
-/**
- * qib_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int qib_destroy_cq(struct ib_cq *ibcq)
-{
- struct qib_ibdev *dev = to_idev(ibcq->device);
- struct qib_cq *cq = to_icq(ibcq);
-
- flush_kthread_work(&cq->comptask);
- spin_lock(&dev->n_cqs_lock);
- dev->n_cqs_allocated--;
- spin_unlock(&dev->n_cqs_lock);
- if (cq->ip)
- kref_put(&cq->ip->ref, qib_release_mmap_info);
- else
- vfree(cq->queue);
- kfree(cq);
-
- return 0;
-}
-
-/**
- * qib_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context. Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
- struct qib_cq *cq = to_icq(ibcq);
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&cq->lock, flags);
- /*
- * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
- * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
- */
- if (cq->notify != IB_CQ_NEXT_COMP)
- cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
- if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
- cq->queue->head != cq->queue->tail)
- ret = 1;
-
- spin_unlock_irqrestore(&cq->lock, flags);
-
- return ret;
-}
-
-/**
- * qib_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
- struct qib_cq *cq = to_icq(ibcq);
- struct qib_cq_wc *old_wc;
- struct qib_cq_wc *wc;
- u32 head, tail, n;
- int ret;
- u32 sz;
-
- if (cqe < 1 || cqe > ib_qib_max_cqes) {
- ret = -EINVAL;
- goto bail;
- }
-
- /*
- * Need to use vmalloc() if we want to support large #s of entries.
- */
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
- else
- sz += sizeof(struct ib_wc) * (cqe + 1);
- wc = vmalloc_user(sz);
- if (!wc) {
- ret = -ENOMEM;
- goto bail;
- }
-
- /* Check that we can write the offset to mmap. */
- if (udata && udata->outlen >= sizeof(__u64)) {
- __u64 offset = 0;
-
- ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (ret)
- goto bail_free;
- }
-
- spin_lock_irq(&cq->lock);
- /*
- * Make sure head and tail are sane since they
- * might be user writable.
- */
- old_wc = cq->queue;
- head = old_wc->head;
- if (head > (u32) cq->ibcq.cqe)
- head = (u32) cq->ibcq.cqe;
- tail = old_wc->tail;
- if (tail > (u32) cq->ibcq.cqe)
- tail = (u32) cq->ibcq.cqe;
- if (head < tail)
- n = cq->ibcq.cqe + 1 + head - tail;
- else
- n = head - tail;
- if (unlikely((u32)cqe < n)) {
- ret = -EINVAL;
- goto bail_unlock;
- }
- for (n = 0; tail != head; n++) {
- if (cq->ip)
- wc->uqueue[n] = old_wc->uqueue[tail];
- else
- wc->kqueue[n] = old_wc->kqueue[tail];
- if (tail == (u32) cq->ibcq.cqe)
- tail = 0;
- else
- tail++;
- }
- cq->ibcq.cqe = cqe;
- wc->head = n;
- wc->tail = 0;
- cq->queue = wc;
- spin_unlock_irq(&cq->lock);
-
- vfree(old_wc);
-
- if (cq->ip) {
- struct qib_ibdev *dev = to_idev(ibcq->device);
- struct qib_mmap_info *ip = cq->ip;
-
- qib_update_mmap_info(dev, ip, sz, wc);
-
- /*
- * Return the offset to mmap.
- * See qib_mmap() for details.
- */
- if (udata && udata->outlen >= sizeof(__u64)) {
- ret = ib_copy_to_udata(udata, &ip->offset,
- sizeof(ip->offset));
- if (ret)
- goto bail;
- }
-
- spin_lock_irq(&dev->pending_lock);
- if (list_empty(&ip->pending_mmaps))
- list_add(&ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
- }
-
- ret = 0;
- goto bail;
-
-bail_unlock:
- spin_unlock_irq(&cq->lock);
-bail_free:
- vfree(wc);
-bail:
- return ret;
-}
-
-int qib_cq_init(struct qib_devdata *dd)
-{
- int ret = 0;
- int cpu;
- struct task_struct *task;
-
- if (dd->worker)
- return 0;
- dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
- if (!dd->worker)
- return -ENOMEM;
- init_kthread_worker(dd->worker);
- task = kthread_create_on_node(
- kthread_worker_fn,
- dd->worker,
- dd->assigned_node_id,
- "qib_cq%d", dd->unit);
- if (IS_ERR(task))
- goto task_fail;
- cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
- kthread_bind(task, cpu);
- wake_up_process(task);
-out:
- return ret;
-task_fail:
- ret = PTR_ERR(task);
- kfree(dd->worker);
- dd->worker = NULL;
- goto out;
-}
-
-void qib_cq_exit(struct qib_devdata *dd)
-{
- struct kthread_worker *worker;
-
- worker = dd->worker;
- if (!worker)
- return;
- /* blocks future queuing from send_complete() */
- dd->worker = NULL;
- smp_wmb();
- flush_kthread_worker(worker);
- kthread_stop(worker->task);
- kfree(worker);
-}
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index f58fdc3d25a2..67ee6438cf59 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -90,6 +90,22 @@ const char *qib_get_unit_name(int unit)
return iname;
}
+const char *qib_get_card_name(struct rvt_dev_info *rdi)
+{
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(ibdev,
+ struct qib_devdata, verbs_dev);
+ return qib_get_unit_name(dd->unit);
+}
+
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
+{
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(ibdev,
+ struct qib_devdata, verbs_dev);
+ return dd->pcidev;
+}
+
/*
* Return count of units with at least one port ACTIVE.
*/
@@ -306,7 +322,9 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
struct qib_other_headers *ohdr = NULL;
struct qib_ibport *ibp = &ppd->ibport_data;
- struct qib_qp *qp = NULL;
+ struct qib_devdata *dd = ppd->dd;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct rvt_qp *qp = NULL;
u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
u16 lid = be16_to_cpu(hdr->lrh[1]);
int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -319,7 +337,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
if (tlen < 24)
goto drop;
- if (lid < QIB_MULTICAST_LID_BASE) {
+ if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
lid &= ~((1 << ppd->lmc) - 1);
if (unlikely(lid != ppd->lid))
goto drop;
@@ -346,13 +364,16 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
psn = be32_to_cpu(ohdr->bth[2]);
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
if (qp_num != QIB_MULTICAST_QPN) {
int ruc_res;
- qp = qib_lookup_qpn(ibp, qp_num);
- if (!qp)
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!qp) {
+ rcu_read_unlock();
goto drop;
+ }
/*
* Handle only RC QPs - for other QP types drop error
@@ -361,9 +382,9 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
spin_lock(&qp->r_lock);
/* Check for valid receive state. */
- if (!(ib_qib_state_ops[qp->state] &
- QIB_PROCESS_RECV_OK)) {
- ibp->n_pkt_drops++;
+ if (!(ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_RECV_OK)) {
+ ibp->rvp.n_pkt_drops++;
goto unlock;
}
@@ -383,7 +404,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff = qib_cmp24(psn, qp->r_psn);
if (!qp->r_nak_state && diff >= 0) {
- ibp->n_rc_seqnak++;
+ ibp->rvp.n_rc_seqnak++;
qp->r_nak_state =
IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
@@ -398,7 +419,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
*/
if (list_empty(&qp->rspwait)) {
qp->r_flags |=
- QIB_R_RSP_NAK;
+ RVT_R_RSP_NAK;
atomic_inc(
&qp->refcount);
list_add_tail(
@@ -419,12 +440,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
unlock:
spin_unlock(&qp->r_lock);
- /*
- * Notify qib_destroy_qp() if it is waiting
- * for us to finish.
- */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rcu_read_unlock();
} /* Unicast QP */
} /* Valid packet with TIDErr */
@@ -456,7 +472,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
int last;
u64 lval;
- struct qib_qp *qp, *nqp;
+ struct rvt_qp *qp, *nqp;
l = rcd->head;
rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
@@ -549,15 +565,6 @@ move_along:
updegr = 0;
}
}
- /*
- * Notify qib_destroy_qp() if it is waiting
- * for lookaside_qp to finish.
- */
- if (rcd->lookaside_qp) {
- if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
- wake_up(&rcd->lookaside_qp->wait);
- rcd->lookaside_qp = NULL;
- }
rcd->head = l;
@@ -567,17 +574,17 @@ move_along:
*/
list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
list_del_init(&qp->rspwait);
- if (qp->r_flags & QIB_R_RSP_NAK) {
- qp->r_flags &= ~QIB_R_RSP_NAK;
+ if (qp->r_flags & RVT_R_RSP_NAK) {
+ qp->r_flags &= ~RVT_R_RSP_NAK;
qib_send_rc_ack(qp);
}
- if (qp->r_flags & QIB_R_RSP_SEND) {
+ if (qp->r_flags & RVT_R_RSP_SEND) {
unsigned long flags;
- qp->r_flags &= ~QIB_R_RSP_SEND;
+ qp->r_flags &= ~RVT_R_RSP_SEND;
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_qib_state_ops[qp->state] &
- QIB_PROCESS_OR_FLUSH_SEND)
+ if (ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_OR_FLUSH_SEND)
qib_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 4b927809d1a1..a3733f25280f 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2956,13 +2956,13 @@ static void pma_6120_timer(unsigned long data)
struct qib_ibport *ibp = &ppd->ibport_data;
unsigned long flags;
- spin_lock_irqsave(&ibp->lock, flags);
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) {
cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
&cs->spkts, &cs->rpkts, &cs->xmit_wait);
mod_timer(&cs->pma_timer,
- jiffies + usecs_to_jiffies(ibp->pma_sample_interval));
+ jiffies + usecs_to_jiffies(ibp->rvp.pma_sample_interval));
} else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
u64 ta, tb, tc, td, te;
@@ -2975,11 +2975,11 @@ static void pma_6120_timer(unsigned long data)
cs->rpkts = td - cs->rpkts;
cs->xmit_wait = te - cs->xmit_wait;
}
- spin_unlock_irqrestore(&ibp->lock, flags);
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
}
/*
- * Note that the caller has the ibp->lock held.
+ * Note that the caller has the ibp->rvp.lock held.
*/
static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv,
u32 start)
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 6c8ff10101c0..82d7c4bf5970 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2910,8 +2910,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
}
- if (dd->pport[i].ibport_data.smi_ah)
- ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah);
}
}
@@ -5497,7 +5495,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
unsigned delay;
int ret;
- agent = ibp->send_agent;
+ agent = ibp->rvp.send_agent;
if (!agent)
goto retry;
@@ -5515,7 +5513,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
ret = PTR_ERR(ah);
else {
send_buf->ah = ah;
- ibp->smi_ah = to_iah(ah);
+ ibp->smi_ah = ibah_to_rvtah(ah);
ret = 0;
}
} else {
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4ff340fe904f..3f062f0dd9d8 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -42,6 +42,7 @@
#ifdef CONFIG_INFINIBAND_QIB_DCA
#include <linux/dca.h>
#endif
+#include <rdma/rdma_vt.h>
#include "qib.h"
#include "qib_common.h"
@@ -244,6 +245,13 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
alloc_percpu(struct qib_pma_counters);
if (!ppd->ibport_data.pmastats)
return -ENOMEM;
+ ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+ ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+ ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+ if (!(ppd->ibport_data.rvp.rc_acks) ||
+ !(ppd->ibport_data.rvp.rc_qacks) ||
+ !(ppd->ibport_data.rvp.rc_delayed_comp))
+ return -ENOMEM;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
goto bail;
@@ -449,8 +457,6 @@ static int loadtime_init(struct qib_devdata *dd)
init_timer(&dd->intrchk_timer);
dd->intrchk_timer.function = verify_interrupt;
dd->intrchk_timer.data = (unsigned long) dd;
-
- ret = qib_cq_init(dd);
done:
return ret;
}
@@ -631,6 +637,9 @@ wq_error:
static void qib_free_pportdata(struct qib_pportdata *ppd)
{
free_percpu(ppd->ibport_data.pmastats);
+ free_percpu(ppd->ibport_data.rvp.rc_acks);
+ free_percpu(ppd->ibport_data.rvp.rc_qacks);
+ free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
ppd->ibport_data.pmastats = NULL;
}
@@ -1081,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd)
qib_dbg_ibdev_exit(&dd->verbs_dev);
#endif
free_percpu(dd->int_counter);
- ib_dealloc_device(&dd->verbs_dev.ibdev);
+ ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
}
u64 qib_int_counter(struct qib_devdata *dd)
@@ -1120,9 +1129,12 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
{
unsigned long flags;
struct qib_devdata *dd;
- int ret;
+ int ret, nports;
- dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
+ /* extra is * number of ports */
+ nports = extra / sizeof(struct qib_pportdata);
+ dd = (struct qib_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+ nports);
if (!dd)
return ERR_PTR(-ENOMEM);
@@ -1171,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
bail:
if (!list_empty(&dd->list))
list_del_init(&dd->list);
- ib_dealloc_device(&dd->verbs_dev.ibdev);
+ ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
return ERR_PTR(ret);
}
@@ -1421,7 +1433,6 @@ static void cleanup_device_data(struct qib_devdata *dd)
}
kfree(tmp);
kfree(dd->boardname);
- qib_cq_exit(dd);
}
/*
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 086616d071b9..a014fd4cd076 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -74,7 +74,7 @@ static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev)
struct ib_event event;
struct qib_devdata *dd = ppd->dd;
- event.device = &dd->verbs_dev.ibdev;
+ event.device = &dd->verbs_dev.rdi.ibdev;
event.element.port_num = ppd->port;
event.event = ev;
ib_dispatch_event(&event);
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index d725c565518d..2c3c93572c17 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -46,20 +46,20 @@
*
*/
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
+int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region)
{
unsigned long flags;
u32 r;
u32 n;
int ret = 0;
struct qib_ibdev *dev = to_idev(mr->pd->device);
- struct qib_lkey_table *rkt = &dev->lk_table;
+ struct rvt_lkey_table *rkt = &dev->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
/* special case for dma_mr lkey == 0 */
if (dma_region) {
- struct qib_mregion *tmr;
+ struct rvt_mregion *tmr;
tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
@@ -90,8 +90,8 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
* bits are capped in qib_verbs.c to insure enough bits
* for generation number
*/
- mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
- ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
+ mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) |
+ ((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen)
<< 8);
if (mr->lkey == 0) {
mr->lkey |= 1 << 8;
@@ -114,13 +114,13 @@ bail:
* qib_free_lkey - free an lkey
* @mr: mr to free from tables
*/
-void qib_free_lkey(struct qib_mregion *mr)
+void qib_free_lkey(struct rvt_mregion *mr)
{
unsigned long flags;
u32 lkey = mr->lkey;
u32 r;
struct qib_ibdev *dev = to_idev(mr->pd->device);
- struct qib_lkey_table *rkt = &dev->lk_table;
+ struct rvt_lkey_table *rkt = &dev->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
if (!mr->lkey_published)
@@ -128,7 +128,7 @@ void qib_free_lkey(struct qib_mregion *mr)
if (lkey == 0)
RCU_INIT_POINTER(dev->dma_mr, NULL);
else {
- r = lkey >> (32 - ib_qib_lkey_table_size);
+ r = lkey >> (32 - ib_rvt_lkey_table_size);
RCU_INIT_POINTER(rkt->table[r], NULL);
}
qib_put_mr(mr);
@@ -138,105 +138,6 @@ out:
}
/**
- * qib_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
- struct qib_sge *isge, struct ib_sge *sge, int acc)
-{
- struct qib_mregion *mr;
- unsigned n, m;
- size_t off;
-
- /*
- * We use LKEY == zero for kernel virtual addresses
- * (see qib_get_dma_mr and qib_dma.c).
- */
- rcu_read_lock();
- if (sge->lkey == 0) {
- struct qib_ibdev *dev = to_idev(pd->ibpd.device);
-
- if (pd->user)
- goto bail;
- mr = rcu_dereference(dev->dma_mr);
- if (!mr)
- goto bail;
- if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
- goto bail;
- rcu_read_unlock();
-
- isge->mr = mr;
- isge->vaddr = (void *) sge->addr;
- isge->length = sge->length;
- isge->sge_length = sge->length;
- isge->m = 0;
- isge->n = 0;
- goto ok;
- }
- mr = rcu_dereference(
- rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
- if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
- goto bail;
-
- off = sge->addr - mr->user_base;
- if (unlikely(sge->addr < mr->user_base ||
- off + sge->length > mr->length ||
- (mr->access_flags & acc) != acc))
- goto bail;
- if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
- goto bail;
- rcu_read_unlock();
-
- off += mr->offset;
- if (mr->page_shift) {
- /*
- page sizes are uniform power of 2 so no loop is necessary
- entries_spanned_by_off is the number of times the loop below
- would have executed.
- */
- size_t entries_spanned_by_off;
-
- entries_spanned_by_off = off >> mr->page_shift;
- off -= (entries_spanned_by_off << mr->page_shift);
- m = entries_spanned_by_off/QIB_SEGSZ;
- n = entries_spanned_by_off%QIB_SEGSZ;
- } else {
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
- }
- isge->mr = mr;
- isge->vaddr = mr->map[m]->segs[n].vaddr + off;
- isge->length = mr->map[m]->segs[n].length - off;
- isge->sge_length = sge->length;
- isge->m = m;
- isge->n = n;
-ok:
- return 1;
-bail:
- rcu_read_unlock();
- return 0;
-}
-
-/**
* qib_rkey_ok - check the IB virtual address, length, and RKEY
* @qp: qp for validation
* @sge: SGE state
@@ -249,11 +150,11 @@ bail:
*
* increments the reference count upon success
*/
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc)
{
- struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
- struct qib_mregion *mr;
+ struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+ struct rvt_mregion *mr;
unsigned n, m;
size_t off;
@@ -263,7 +164,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
*/
rcu_read_lock();
if (rkey == 0) {
- struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+ struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
@@ -285,7 +186,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
}
mr = rcu_dereference(
- rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+ rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]);
if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
@@ -308,15 +209,15 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
entries_spanned_by_off = off >> mr->page_shift;
off -= (entries_spanned_by_off << mr->page_shift);
- m = entries_spanned_by_off/QIB_SEGSZ;
- n = entries_spanned_by_off%QIB_SEGSZ;
+ m = entries_spanned_by_off / RVT_SEGSZ;
+ n = entries_spanned_by_off % RVT_SEGSZ;
} else {
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
- if (n >= QIB_SEGSZ) {
+ if (n >= RVT_SEGSZ) {
m++;
n = 0;
}
@@ -335,58 +236,3 @@ bail:
return 0;
}
-/*
- * Initialize the memory region specified by the work request.
- */
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
-{
- struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
- struct qib_pd *pd = to_ipd(qp->ibqp.pd);
- struct qib_mr *mr = to_imr(wr->mr);
- struct qib_mregion *mrg;
- u32 key = wr->key;
- unsigned i, n, m;
- int ret = -EINVAL;
- unsigned long flags;
- u64 *page_list;
- size_t ps;
-
- spin_lock_irqsave(&rkt->lock, flags);
- if (pd->user || key == 0)
- goto bail;
-
- mrg = rcu_dereference_protected(
- rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
- lockdep_is_held(&rkt->lock));
- if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
- goto bail;
-
- if (mr->npages > mrg->max_segs)
- goto bail;
-
- ps = mr->ibmr.page_size;
- if (mr->ibmr.length > ps * mr->npages)
- goto bail;
-
- mrg->user_base = mr->ibmr.iova;
- mrg->iova = mr->ibmr.iova;
- mrg->lkey = key;
- mrg->length = mr->ibmr.length;
- mrg->access_flags = wr->access;
- page_list = mr->pages;
- m = 0;
- n = 0;
- for (i = 0; i < mr->npages; i++) {
- mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
- mrg->map[m]->segs[n].length = ps;
- if (++n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = 0;
-bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
- return ret;
-}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 9625e7c438e5..0bd18375d7df 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -70,7 +70,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
unsigned long flags;
unsigned long timeout;
- agent = ibp->send_agent;
+ agent = ibp->rvp.send_agent;
if (!agent)
return;
@@ -79,7 +79,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
return;
/* o14-2 */
- if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+ if (ibp->rvp.trap_timeout &&
+ time_before(jiffies, ibp->rvp.trap_timeout))
return;
send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
@@ -93,42 +94,42 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
smp->class_version = 1;
smp->method = IB_MGMT_METHOD_TRAP;
- ibp->tid++;
- smp->tid = cpu_to_be64(ibp->tid);
+ ibp->rvp.tid++;
+ smp->tid = cpu_to_be64(ibp->rvp.tid);
smp->attr_id = IB_SMP_ATTR_NOTICE;
/* o14-1: smp->mkey = 0; */
memcpy(smp->data, data, len);
- spin_lock_irqsave(&ibp->lock, flags);
- if (!ibp->sm_ah) {
- if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ if (!ibp->rvp.sm_ah) {
+ if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
struct ib_ah *ah;
- ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
+ ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid);
if (IS_ERR(ah))
ret = PTR_ERR(ah);
else {
send_buf->ah = ah;
- ibp->sm_ah = to_iah(ah);
+ ibp->rvp.sm_ah = ibah_to_rvtah(ah);
ret = 0;
}
} else
ret = -EINVAL;
} else {
- send_buf->ah = &ibp->sm_ah->ibah;
+ send_buf->ah = &ibp->rvp.sm_ah->ibah;
ret = 0;
}
- spin_unlock_irqrestore(&ibp->lock, flags);
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (!ret)
ret = ib_post_send_mad(send_buf, NULL);
if (!ret) {
/* 4.096 usec. */
- timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
- ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+ timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+ ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
} else {
ib_free_send_mad(send_buf);
- ibp->trap_timeout = 0;
+ ibp->rvp.trap_timeout = 0;
}
}
@@ -141,10 +142,10 @@ void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
struct ib_mad_notice_attr data;
if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
- ibp->pkey_violations++;
+ ibp->rvp.pkey_violations++;
else
- ibp->qkey_violations++;
- ibp->n_pkt_drops++;
+ ibp->rvp.qkey_violations++;
+ ibp->rvp.n_pkt_drops++;
/* Send violation trap */
data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -205,8 +206,11 @@ static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp)
/*
* Send a Port Capability Mask Changed trap (ch. 14.3.11).
*/
-void qib_cap_mask_chg(struct qib_ibport *ibp)
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
{
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = dd_from_dev(ibdev);
+ struct qib_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
struct ib_mad_notice_attr data;
data.generic_type = IB_NOTICE_TYPE_INFO;
@@ -217,8 +221,8 @@ void qib_cap_mask_chg(struct qib_ibport *ibp)
data.toggle_count = 0;
memset(&data.details, 0, sizeof(data.details));
data.details.ntc_144.lid = data.issuer_lid;
- data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-
+ data.details.ntc_144.new_cap_mask =
+ cpu_to_be32(ibp->rvp.port_cap_flags);
qib_send_trap(ibp, &data, sizeof(data));
}
@@ -409,37 +413,38 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
int ret = 0;
/* Is the mkey in the process of expiring? */
- if (ibp->mkey_lease_timeout &&
- time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+ if (ibp->rvp.mkey_lease_timeout &&
+ time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
/* Clear timeout and mkey protection field. */
- ibp->mkey_lease_timeout = 0;
- ibp->mkeyprot = 0;
+ ibp->rvp.mkey_lease_timeout = 0;
+ ibp->rvp.mkeyprot = 0;
}
- if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 ||
- ibp->mkey == smp->mkey)
+ if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->rvp.mkey == 0 ||
+ ibp->rvp.mkey == smp->mkey)
valid_mkey = 1;
/* Unset lease timeout on any valid Get/Set/TrapRepress */
- if (valid_mkey && ibp->mkey_lease_timeout &&
+ if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
(smp->method == IB_MGMT_METHOD_GET ||
smp->method == IB_MGMT_METHOD_SET ||
smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
- ibp->mkey_lease_timeout = 0;
+ ibp->rvp.mkey_lease_timeout = 0;
if (!valid_mkey) {
switch (smp->method) {
case IB_MGMT_METHOD_GET:
/* Bad mkey not a violation below level 2 */
- if (ibp->mkeyprot < 2)
+ if (ibp->rvp.mkeyprot < 2)
break;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
- if (ibp->mkey_violations != 0xFFFF)
- ++ibp->mkey_violations;
- if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
- ibp->mkey_lease_timeout = jiffies +
- ibp->mkey_lease_period * HZ;
+ if (ibp->rvp.mkey_violations != 0xFFFF)
+ ++ibp->rvp.mkey_violations;
+ if (!ibp->rvp.mkey_lease_timeout &&
+ ibp->rvp.mkey_lease_period)
+ ibp->rvp.mkey_lease_timeout = jiffies +
+ ibp->rvp.mkey_lease_period * HZ;
/* Generate a trap notice. */
qib_bad_mkey(ibp, smp);
ret = 1;
@@ -489,15 +494,15 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
/* Only return the mkey if the protection field allows it. */
if (!(smp->method == IB_MGMT_METHOD_GET &&
- ibp->mkey != smp->mkey &&
- ibp->mkeyprot == 1))
- pip->mkey = ibp->mkey;
- pip->gid_prefix = ibp->gid_prefix;
+ ibp->rvp.mkey != smp->mkey &&
+ ibp->rvp.mkeyprot == 1))
+ pip->mkey = ibp->rvp.mkey;
+ pip->gid_prefix = ibp->rvp.gid_prefix;
pip->lid = cpu_to_be16(ppd->lid);
- pip->sm_lid = cpu_to_be16(ibp->sm_lid);
- pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
+ pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid);
+ pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
/* pip->diag_code; */
- pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+ pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
pip->local_port_num = port;
pip->link_width_enabled = ppd->link_width_enabled;
pip->link_width_supported = ppd->link_width_supported;
@@ -508,7 +513,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
pip->portphysstate_linkdown =
(dd->f_ibphys_portstate(ppd->lastibcstat) << 4) |
(get_linkdowndefaultstate(ppd) ? 1 : 2);
- pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+ pip->mkeyprot_resv_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) |
ppd->link_speed_enabled;
switch (ppd->ibmtu) {
@@ -529,9 +534,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
mtu = IB_MTU_256;
break;
}
- pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl;
+ pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->rvp.sm_sl;
pip->vlcap_inittype = ppd->vls_supported << 4; /* InitType = 0 */
- pip->vl_high_limit = ibp->vl_high_limit;
+ pip->vl_high_limit = ibp->rvp.vl_high_limit;
pip->vl_arb_high_cap =
dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP);
pip->vl_arb_low_cap =
@@ -542,20 +547,20 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
/* pip->vlstallcnt_hoqlife; */
pip->operationalvl_pei_peo_fpi_fpo =
dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4;
- pip->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+ pip->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
/* P_KeyViolations are counted by hardware. */
- pip->pkey_violations = cpu_to_be16(ibp->pkey_violations);
- pip->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+ pip->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+ pip->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
/* Only the hardware GUID is supported for now */
pip->guid_cap = QIB_GUIDS_PER_PORT;
- pip->clientrereg_resv_subnetto = ibp->subnet_timeout;
+ pip->clientrereg_resv_subnetto = ibp->rvp.subnet_timeout;
/* 32.768 usec. response time (guessing) */
pip->resv_resptimevalue = 3;
pip->localphyerrors_overrunerrors =
(get_phyerrthreshold(ppd) << 4) |
get_overrunthreshold(ppd);
/* pip->max_credit_hint; */
- if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+ if (ibp->rvp.port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
u32 v;
v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY);
@@ -685,13 +690,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
event.device = ibdev;
event.element.port_num = port;
- ibp->mkey = pip->mkey;
- ibp->gid_prefix = pip->gid_prefix;
- ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
+ ibp->rvp.mkey = pip->mkey;
+ ibp->rvp.gid_prefix = pip->gid_prefix;
+ ibp->rvp.mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
lid = be16_to_cpu(pip->lid);
/* Must be a valid unicast LID address. */
- if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
+ if (lid == 0 || lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
smp->status |= IB_SMP_INVALID_FIELD;
else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
if (ppd->lid != lid)
@@ -706,21 +711,21 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
smlid = be16_to_cpu(pip->sm_lid);
msl = pip->neighbormtu_mastersmsl & 0xF;
/* Must be a valid unicast LID address. */
- if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
+ if (smlid == 0 || smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
smp->status |= IB_SMP_INVALID_FIELD;
- else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
- spin_lock_irqsave(&ibp->lock, flags);
- if (ibp->sm_ah) {
- if (smlid != ibp->sm_lid)
- ibp->sm_ah->attr.dlid = smlid;
- if (msl != ibp->sm_sl)
- ibp->sm_ah->attr.sl = msl;
+ else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ if (ibp->rvp.sm_ah) {
+ if (smlid != ibp->rvp.sm_lid)
+ ibp->rvp.sm_ah->attr.dlid = smlid;
+ if (msl != ibp->rvp.sm_sl)
+ ibp->rvp.sm_ah->attr.sl = msl;
}
- spin_unlock_irqrestore(&ibp->lock, flags);
- if (smlid != ibp->sm_lid)
- ibp->sm_lid = smlid;
- if (msl != ibp->sm_sl)
- ibp->sm_sl = msl;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ if (smlid != ibp->rvp.sm_lid)
+ ibp->rvp.sm_lid = smlid;
+ if (msl != ibp->rvp.sm_sl)
+ ibp->rvp.sm_sl = msl;
event.event = IB_EVENT_SM_CHANGE;
ib_dispatch_event(&event);
}
@@ -768,10 +773,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
smp->status |= IB_SMP_INVALID_FIELD;
}
- ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
- ibp->vl_high_limit = pip->vl_high_limit;
+ ibp->rvp.mkeyprot = pip->mkeyprot_resv_lmc >> 6;
+ ibp->rvp.vl_high_limit = pip->vl_high_limit;
(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT,
- ibp->vl_high_limit);
+ ibp->rvp.vl_high_limit);
mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
if (mtu == -1)
@@ -789,13 +794,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
}
if (pip->mkey_violations == 0)
- ibp->mkey_violations = 0;
+ ibp->rvp.mkey_violations = 0;
if (pip->pkey_violations == 0)
- ibp->pkey_violations = 0;
+ ibp->rvp.pkey_violations = 0;
if (pip->qkey_violations == 0)
- ibp->qkey_violations = 0;
+ ibp->rvp.qkey_violations = 0;
ore = pip->localphyerrors_overrunerrors;
if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
@@ -804,7 +809,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
if (set_overrunthreshold(ppd, (ore & 0xF)))
smp->status |= IB_SMP_INVALID_FIELD;
- ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
+ ibp->rvp.subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
/*
* Do the port state change now that the other link parameters
@@ -1028,7 +1033,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
event.event = IB_EVENT_PKEY_CHANGE;
- event.device = &dd->verbs_dev.ibdev;
+ event.device = &dd->verbs_dev.rdi.ibdev;
event.element.port_num = port;
ib_dispatch_event(&event);
}
@@ -1062,7 +1067,7 @@ static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
memset(smp->data, 0, sizeof(smp->data));
- if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP))
+ if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP))
smp->status |= IB_SMP_UNSUP_METHOD;
else
for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2)
@@ -1078,7 +1083,7 @@ static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
u8 *p = (u8 *) smp->data;
unsigned i;
- if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) {
+ if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP)) {
smp->status |= IB_SMP_UNSUP_METHOD;
return reply(smp);
}
@@ -1195,20 +1200,20 @@ static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
goto bail;
}
- spin_lock_irqsave(&ibp->lock, flags);
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS);
p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
p->counter_width = 4; /* 32 bit counters */
p->counter_mask0_9 = COUNTER_MASK0_9;
- p->sample_start = cpu_to_be32(ibp->pma_sample_start);
- p->sample_interval = cpu_to_be32(ibp->pma_sample_interval);
- p->tag = cpu_to_be16(ibp->pma_tag);
- p->counter_select[0] = ibp->pma_counter_select[0];
- p->counter_select[1] = ibp->pma_counter_select[1];
- p->counter_select[2] = ibp->pma_counter_select[2];
- p->counter_select[3] = ibp->pma_counter_select[3];
- p->counter_select[4] = ibp->pma_counter_select[4];
- spin_unlock_irqrestore(&ibp->lock, flags);
+ p->sample_start = cpu_to_be32(ibp->rvp.pma_sample_start);
+ p->sample_interval = cpu_to_be32(ibp->rvp.pma_sample_interval);
+ p->tag = cpu_to_be16(ibp->rvp.pma_tag);
+ p->counter_select[0] = ibp->rvp.pma_counter_select[0];
+ p->counter_select[1] = ibp->rvp.pma_counter_select[1];
+ p->counter_select[2] = ibp->rvp.pma_counter_select[2];
+ p->counter_select[3] = ibp->rvp.pma_counter_select[3];
+ p->counter_select[4] = ibp->rvp.pma_counter_select[4];
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
bail:
return reply((struct ib_smp *) pmp);
@@ -1233,7 +1238,7 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
goto bail;
}
- spin_lock_irqsave(&ibp->lock, flags);
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
/* Port Sampling code owns the PS* HW counters */
xmit_flags = ppd->cong_stats.flags;
@@ -1242,18 +1247,18 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
if (status == IB_PMA_SAMPLE_STATUS_DONE ||
(status == IB_PMA_SAMPLE_STATUS_RUNNING &&
xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) {
- ibp->pma_sample_start = be32_to_cpu(p->sample_start);
- ibp->pma_sample_interval = be32_to_cpu(p->sample_interval);
- ibp->pma_tag = be16_to_cpu(p->tag);
- ibp->pma_counter_select[0] = p->counter_select[0];
- ibp->pma_counter_select[1] = p->counter_select[1];
- ibp->pma_counter_select[2] = p->counter_select[2];
- ibp->pma_counter_select[3] = p->counter_select[3];
- ibp->pma_counter_select[4] = p->counter_select[4];
- dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval,
- ibp->pma_sample_start);
+ ibp->rvp.pma_sample_start = be32_to_cpu(p->sample_start);
+ ibp->rvp.pma_sample_interval = be32_to_cpu(p->sample_interval);
+ ibp->rvp.pma_tag = be16_to_cpu(p->tag);
+ ibp->rvp.pma_counter_select[0] = p->counter_select[0];
+ ibp->rvp.pma_counter_select[1] = p->counter_select[1];
+ ibp->rvp.pma_counter_select[2] = p->counter_select[2];
+ ibp->rvp.pma_counter_select[3] = p->counter_select[3];
+ ibp->rvp.pma_counter_select[4] = p->counter_select[4];
+ dd->f_set_cntr_sample(ppd, ibp->rvp.pma_sample_interval,
+ ibp->rvp.pma_sample_start);
}
- spin_unlock_irqrestore(&ibp->lock, flags);
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
ret = pma_get_portsamplescontrol(pmp, ibdev, port);
@@ -1357,8 +1362,8 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
int i;
memset(pmp->data, 0, sizeof(pmp->data));
- spin_lock_irqsave(&ibp->lock, flags);
- p->tag = cpu_to_be16(ibp->pma_tag);
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ p->tag = cpu_to_be16(ibp->rvp.pma_tag);
if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
else {
@@ -1373,11 +1378,11 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
}
}
- for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+ for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
p->counter[i] = cpu_to_be32(
get_cache_hw_sample_counters(
- ppd, ibp->pma_counter_select[i]));
- spin_unlock_irqrestore(&ibp->lock, flags);
+ ppd, ibp->rvp.pma_counter_select[i]));
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
return reply((struct ib_smp *) pmp);
}
@@ -1397,8 +1402,8 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
/* Port Sampling code owns the PS* HW counters */
memset(pmp->data, 0, sizeof(pmp->data));
- spin_lock_irqsave(&ibp->lock, flags);
- p->tag = cpu_to_be16(ibp->pma_tag);
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ p->tag = cpu_to_be16(ibp->rvp.pma_tag);
if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
else {
@@ -1415,11 +1420,11 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
}
}
- for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+ for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
p->counter[i] = cpu_to_be64(
get_cache_hw_sample_counters(
- ppd, ibp->pma_counter_select[i]));
- spin_unlock_irqrestore(&ibp->lock, flags);
+ ppd, ibp->rvp.pma_counter_select[i]));
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
return reply((struct ib_smp *) pmp);
}
@@ -1453,7 +1458,7 @@ static int pma_get_portcounters(struct ib_pma_mad *pmp,
cntrs.excessive_buffer_overrun_errors -=
ibp->z_excessive_buffer_overrun_errors;
cntrs.vl15_dropped -= ibp->z_vl15_dropped;
- cntrs.vl15_dropped += ibp->n_vl15_dropped;
+ cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
memset(pmp->data, 0, sizeof(pmp->data));
@@ -1546,9 +1551,9 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
qib_get_counters(ppd, &cntrs);
- spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+ spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
xmit_wait_counter = xmit_wait_get_value_delta(ppd);
- spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+ spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
/* Adjust counters for any resets done. */
cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
@@ -1564,7 +1569,7 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
cntrs.excessive_buffer_overrun_errors -=
ibp->z_excessive_buffer_overrun_errors;
cntrs.vl15_dropped -= ibp->z_vl15_dropped;
- cntrs.vl15_dropped += ibp->n_vl15_dropped;
+ cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
cntrs.port_xmit_data -= ibp->z_port_xmit_data;
cntrs.port_rcv_data -= ibp->z_port_rcv_data;
cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
@@ -1743,7 +1748,7 @@ static int pma_set_portcounters(struct ib_pma_mad *pmp,
cntrs.excessive_buffer_overrun_errors;
if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
- ibp->n_vl15_dropped = 0;
+ ibp->rvp.n_vl15_dropped = 0;
ibp->z_vl15_dropped = cntrs.vl15_dropped;
}
@@ -1778,11 +1783,11 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
ret = pma_get_portcounters_cong(pmp, ibdev, port);
if (counter_select & IB_PMA_SEL_CONG_XMIT) {
- spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+ spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
ppd->cong_stats.counter = 0;
dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL,
0x0);
- spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+ spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
}
if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) {
ibp->z_port_xmit_data = cntrs.port_xmit_data;
@@ -1806,7 +1811,7 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
cntrs.local_link_integrity_errors;
ibp->z_excessive_buffer_overrun_errors =
cntrs.excessive_buffer_overrun_errors;
- ibp->n_vl15_dropped = 0;
+ ibp->rvp.n_vl15_dropped = 0;
ibp->z_vl15_dropped = cntrs.vl15_dropped;
}
@@ -1916,12 +1921,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
ret = subn_get_vl_arb(smp, ibdev, port);
goto bail;
case IB_SMP_ATTR_SM_INFO:
- if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
ret = IB_MAD_RESULT_SUCCESS |
IB_MAD_RESULT_CONSUMED;
goto bail;
}
- if (ibp->port_cap_flags & IB_PORT_SM) {
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
ret = IB_MAD_RESULT_SUCCESS;
goto bail;
}
@@ -1950,12 +1955,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
ret = subn_set_vl_arb(smp, ibdev, port);
goto bail;
case IB_SMP_ATTR_SM_INFO:
- if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
ret = IB_MAD_RESULT_SUCCESS |
IB_MAD_RESULT_CONSUMED;
goto bail;
}
- if (ibp->port_cap_flags & IB_PORT_SM) {
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
ret = IB_MAD_RESULT_SUCCESS;
goto bail;
}
@@ -2443,12 +2448,6 @@ bail:
return ret;
}
-static void send_handler(struct ib_mad_agent *agent,
- struct ib_mad_send_wc *mad_send_wc)
-{
- ib_free_send_mad(mad_send_wc->send_buf);
-}
-
static void xmit_wait_timer_func(unsigned long opaque)
{
struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
@@ -2456,7 +2455,7 @@ static void xmit_wait_timer_func(unsigned long opaque)
unsigned long flags;
u8 status;
- spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+ spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) {
status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
if (status == IB_PMA_SAMPLE_STATUS_DONE) {
@@ -2469,74 +2468,35 @@ static void xmit_wait_timer_func(unsigned long opaque)
ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd);
dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0);
done:
- spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+ spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
mod_timer(&ppd->cong_stats.timer, jiffies + HZ);
}
-int qib_create_agents(struct qib_ibdev *dev)
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx)
{
- struct qib_devdata *dd = dd_from_dev(dev);
- struct ib_mad_agent *agent;
- struct qib_ibport *ibp;
- int p;
- int ret;
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(ibdev,
+ struct qib_devdata, verbs_dev);
- for (p = 0; p < dd->num_pports; p++) {
- ibp = &dd->pport[p].ibport_data;
- agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
- NULL, 0, send_handler,
- NULL, NULL, 0);
- if (IS_ERR(agent)) {
- ret = PTR_ERR(agent);
- goto err;
- }
-
- /* Initialize xmit_wait structure */
- dd->pport[p].cong_stats.counter = 0;
- init_timer(&dd->pport[p].cong_stats.timer);
- dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func;
- dd->pport[p].cong_stats.timer.data =
- (unsigned long)(&dd->pport[p]);
- dd->pport[p].cong_stats.timer.expires = 0;
- add_timer(&dd->pport[p].cong_stats.timer);
-
- ibp->send_agent = agent;
- }
-
- return 0;
-
-err:
- for (p = 0; p < dd->num_pports; p++) {
- ibp = &dd->pport[p].ibport_data;
- if (ibp->send_agent) {
- agent = ibp->send_agent;
- ibp->send_agent = NULL;
- ib_unregister_mad_agent(agent);
- }
- }
-
- return ret;
+ /* Initialize xmit_wait structure */
+ dd->pport[port_idx].cong_stats.counter = 0;
+ init_timer(&dd->pport[port_idx].cong_stats.timer);
+ dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func;
+ dd->pport[port_idx].cong_stats.timer.data =
+ (unsigned long)(&dd->pport[port_idx]);
+ dd->pport[port_idx].cong_stats.timer.expires = 0;
+ add_timer(&dd->pport[port_idx].cong_stats.timer);
}
-void qib_free_agents(struct qib_ibdev *dev)
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
{
- struct qib_devdata *dd = dd_from_dev(dev);
- struct ib_mad_agent *agent;
- struct qib_ibport *ibp;
- int p;
-
- for (p = 0; p < dd->num_pports; p++) {
- ibp = &dd->pport[p].ibport_data;
- if (ibp->send_agent) {
- agent = ibp->send_agent;
- ibp->send_agent = NULL;
- ib_unregister_mad_agent(agent);
- }
- if (ibp->sm_ah) {
- ib_destroy_ah(&ibp->sm_ah->ibah);
- ibp->sm_ah = NULL;
- }
- if (dd->pport[p].cong_stats.timer.data)
- del_timer_sync(&dd->pport[p].cong_stats.timer);
- }
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(ibdev,
+ struct qib_devdata, verbs_dev);
+
+ if (dd->pport[port_idx].cong_stats.timer.data)
+ del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
+
+ if (dd->pport[port_idx].ibport_data.smi_ah)
+ ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
}
diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c
deleted file mode 100644
index 34927b700b0e..000000000000
--- a/drivers/infiniband/hw/qib/qib_mmap.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct qib_mmap_info
- */
-void qib_release_mmap_info(struct kref *ref)
-{
- struct qib_mmap_info *ip =
- container_of(ref, struct qib_mmap_info, ref);
- struct qib_ibdev *dev = to_idev(ip->context->device);
-
- spin_lock_irq(&dev->pending_lock);
- list_del(&ip->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
-
- vfree(ip->obj);
- kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void qib_vma_open(struct vm_area_struct *vma)
-{
- struct qib_mmap_info *ip = vma->vm_private_data;
-
- kref_get(&ip->ref);
-}
-
-static void qib_vma_close(struct vm_area_struct *vma)
-{
- struct qib_mmap_info *ip = vma->vm_private_data;
-
- kref_put(&ip->ref, qib_release_mmap_info);
-}
-
-static const struct vm_operations_struct qib_vm_ops = {
- .open = qib_vma_open,
- .close = qib_vma_close,
-};
-
-/**
- * qib_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- struct qib_ibdev *dev = to_idev(context->device);
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
- unsigned long size = vma->vm_end - vma->vm_start;
- struct qib_mmap_info *ip, *pp;
- int ret = -EINVAL;
-
- /*
- * Search the device's list of objects waiting for a mmap call.
- * Normally, this list is very short since a call to create a
- * CQ, QP, or SRQ is soon followed by a call to mmap().
- */
- spin_lock_irq(&dev->pending_lock);
- list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
- pending_mmaps) {
- /* Only the creator is allowed to mmap the object */
- if (context != ip->context || (__u64) offset != ip->offset)
- continue;
- /* Don't allow a mmap larger than the object. */
- if (size > ip->size)
- break;
-
- list_del_init(&ip->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
-
- ret = remap_vmalloc_range(vma, ip->obj, 0);
- if (ret)
- goto done;
- vma->vm_ops = &qib_vm_ops;
- vma->vm_private_data = ip;
- qib_vma_open(vma);
- goto done;
- }
- spin_unlock_irq(&dev->pending_lock);
-done:
- return ret;
-}
-
-/*
- * Allocate information for qib_mmap
- */
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev,
- u32 size,
- struct ib_ucontext *context,
- void *obj) {
- struct qib_mmap_info *ip;
-
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip)
- goto bail;
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irq(&dev->mmap_offset_lock);
- if (dev->mmap_offset == 0)
- dev->mmap_offset = PAGE_SIZE;
- ip->offset = dev->mmap_offset;
- dev->mmap_offset += size;
- spin_unlock_irq(&dev->mmap_offset_lock);
-
- INIT_LIST_HEAD(&ip->pending_mmaps);
- ip->size = size;
- ip->context = context;
- ip->obj = obj;
- kref_init(&ip->ref);
-
-bail:
- return ip;
-}
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
- u32 size, void *obj)
-{
- size = PAGE_ALIGN(size);
-
- spin_lock_irq(&dev->mmap_offset_lock);
- if (dev->mmap_offset == 0)
- dev->mmap_offset = PAGE_SIZE;
- ip->offset = dev->mmap_offset;
- dev->mmap_offset += size;
- spin_unlock_irq(&dev->mmap_offset_lock);
-
- ip->size = size;
- ip->obj = obj;
-}
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
deleted file mode 100644
index 5f53304e8a9b..000000000000
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "qib.h"
-
-/* Fast memory region */
-struct qib_fmr {
- struct ib_fmr ibfmr;
- struct qib_mregion mr; /* must be last */
-};
-
-static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct qib_fmr, ibfmr);
-}
-
-static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
- int count)
-{
- int m, i = 0;
- int rval = 0;
-
- m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
- for (; i < m; i++) {
- mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
- if (!mr->map[i])
- goto bail;
- }
- mr->mapsz = m;
- init_completion(&mr->comp);
- /* count returning the ptr to user */
- atomic_set(&mr->refcount, 1);
- mr->pd = pd;
- mr->max_segs = count;
-out:
- return rval;
-bail:
- while (i)
- kfree(mr->map[--i]);
- rval = -ENOMEM;
- goto out;
-}
-
-static void deinit_qib_mregion(struct qib_mregion *mr)
-{
- int i = mr->mapsz;
-
- mr->mapsz = 0;
- while (i)
- kfree(mr->map[--i]);
-}
-
-
-/**
- * qib_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see qib_dma.c).
- */
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct qib_mr *mr = NULL;
- struct ib_mr *ret;
- int rval;
-
- if (to_ipd(pd)->user) {
- ret = ERR_PTR(-EPERM);
- goto bail;
- }
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- rval = init_qib_mregion(&mr->mr, pd, 0);
- if (rval) {
- ret = ERR_PTR(rval);
- goto bail;
- }
-
-
- rval = qib_alloc_lkey(&mr->mr, 1);
- if (rval) {
- ret = ERR_PTR(rval);
- goto bail_mregion;
- }
-
- mr->mr.access_flags = acc;
- ret = &mr->ibmr;
-done:
- return ret;
-
-bail_mregion:
- deinit_qib_mregion(&mr->mr);
-bail:
- kfree(mr);
- goto done;
-}
-
-static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
-{
- struct qib_mr *mr;
- int rval = -ENOMEM;
- int m;
-
- /* Allocate struct plus pointers to first level page tables. */
- m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
- mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
- if (!mr)
- goto bail;
-
- rval = init_qib_mregion(&mr->mr, pd, count);
- if (rval)
- goto bail;
-
- rval = qib_alloc_lkey(&mr->mr, 0);
- if (rval)
- goto bail_mregion;
- mr->ibmr.lkey = mr->mr.lkey;
- mr->ibmr.rkey = mr->mr.lkey;
-done:
- return mr;
-
-bail_mregion:
- deinit_qib_mregion(&mr->mr);
-bail:
- kfree(mr);
- mr = ERR_PTR(rval);
- goto done;
-}
-
-/**
- * qib_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the QLogic_IB driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_udata *udata)
-{
- struct qib_mr *mr;
- struct ib_umem *umem;
- struct scatterlist *sg;
- int n, m, entry;
- struct ib_mr *ret;
-
- if (length == 0) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
-
- umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags, 0);
- if (IS_ERR(umem))
- return (void *) umem;
-
- n = umem->nmap;
-
- mr = alloc_mr(n, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- ib_umem_release(umem);
- goto bail;
- }
-
- mr->mr.user_base = start;
- mr->mr.iova = virt_addr;
- mr->mr.length = length;
- mr->mr.offset = ib_umem_offset(umem);
- mr->mr.access_flags = mr_access_flags;
- mr->umem = umem;
-
- if (is_power_of_2(umem->page_size))
- mr->mr.page_shift = ilog2(umem->page_size);
- m = 0;
- n = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- void *vaddr;
-
- vaddr = page_address(sg_page(sg));
- if (!vaddr) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- n++;
- if (n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
- * qib_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by qib_get_dma_mr()
- * or qib_reg_user_mr().
- */
-int qib_dereg_mr(struct ib_mr *ibmr)
-{
- struct qib_mr *mr = to_imr(ibmr);
- int ret = 0;
- unsigned long timeout;
-
- kfree(mr->pages);
- qib_free_lkey(&mr->mr);
-
- qib_put_mr(&mr->mr); /* will set completion if last */
- timeout = wait_for_completion_timeout(&mr->mr.comp,
- 5 * HZ);
- if (!timeout) {
- qib_get_mr(&mr->mr);
- ret = -EBUSY;
- goto out;
- }
- deinit_qib_mregion(&mr->mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
- kfree(mr);
-out:
- return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- */
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
-{
- struct qib_mr *mr;
-
- if (mr_type != IB_MR_TYPE_MEM_REG)
- return ERR_PTR(-EINVAL);
-
- mr = alloc_mr(max_num_sg, pd);
- if (IS_ERR(mr))
- return (struct ib_mr *)mr;
-
- mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
- if (!mr->pages)
- goto err;
-
- return &mr->ibmr;
-
-err:
- qib_dereg_mr(&mr->ibmr);
- return ERR_PTR(-ENOMEM);
-}
-
-static int qib_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct qib_mr *mr = to_imr(ibmr);
-
- if (unlikely(mr->npages == mr->mr.max_segs))
- return -ENOMEM;
-
- mr->pages[mr->npages++] = addr;
-
- return 0;
-}
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
-{
- struct qib_mr *mr = to_imr(ibmr);
-
- mr->npages = 0;
-
- return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
-}
-
-/**
- * qib_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
-{
- struct qib_fmr *fmr;
- int m;
- struct ib_fmr *ret;
- int rval = -ENOMEM;
-
- /* Allocate struct plus pointers to first level page tables. */
- m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
- fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
- if (!fmr)
- goto bail;
-
- rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
- if (rval)
- goto bail;
-
- /*
- * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
- * rkey.
- */
- rval = qib_alloc_lkey(&fmr->mr, 0);
- if (rval)
- goto bail_mregion;
- fmr->ibfmr.rkey = fmr->mr.lkey;
- fmr->ibfmr.lkey = fmr->mr.lkey;
- /*
- * Resources are allocated but no valid mapping (RKEY can't be
- * used).
- */
- fmr->mr.access_flags = mr_access_flags;
- fmr->mr.max_segs = fmr_attr->max_pages;
- fmr->mr.page_shift = fmr_attr->page_shift;
-
- ret = &fmr->ibfmr;
-done:
- return ret;
-
-bail_mregion:
- deinit_qib_mregion(&fmr->mr);
-bail:
- kfree(fmr);
- ret = ERR_PTR(rval);
- goto done;
-}
-
-/**
- * qib_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova)
-{
- struct qib_fmr *fmr = to_ifmr(ibfmr);
- struct qib_lkey_table *rkt;
- unsigned long flags;
- int m, n, i;
- u32 ps;
- int ret;
-
- i = atomic_read(&fmr->mr.refcount);
- if (i > 2)
- return -EBUSY;
-
- if (list_len > fmr->mr.max_segs) {
- ret = -EINVAL;
- goto bail;
- }
- rkt = &to_idev(ibfmr->device)->lk_table;
- spin_lock_irqsave(&rkt->lock, flags);
- fmr->mr.user_base = iova;
- fmr->mr.iova = iova;
- ps = 1 << fmr->mr.page_shift;
- fmr->mr.length = list_len * ps;
- m = 0;
- n = 0;
- for (i = 0; i < list_len; i++) {
- fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
- fmr->mr.map[m]->segs[n].length = ps;
- if (++n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * qib_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int qib_unmap_fmr(struct list_head *fmr_list)
-{
- struct qib_fmr *fmr;
- struct qib_lkey_table *rkt;
- unsigned long flags;
-
- list_for_each_entry(fmr, fmr_list, ibfmr.list) {
- rkt = &to_idev(fmr->ibfmr.device)->lk_table;
- spin_lock_irqsave(&rkt->lock, flags);
- fmr->mr.user_base = 0;
- fmr->mr.iova = 0;
- fmr->mr.length = 0;
- spin_unlock_irqrestore(&rkt->lock, flags);
- }
- return 0;
-}
-
-/**
- * qib_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int qib_dealloc_fmr(struct ib_fmr *ibfmr)
-{
- struct qib_fmr *fmr = to_ifmr(ibfmr);
- int ret = 0;
- unsigned long timeout;
-
- qib_free_lkey(&fmr->mr);
- qib_put_mr(&fmr->mr); /* will set completion if last */
- timeout = wait_for_completion_timeout(&fmr->mr.comp,
- 5 * HZ);
- if (!timeout) {
- qib_get_mr(&fmr->mr);
- ret = -EBUSY;
- goto out;
- }
- deinit_qib_mregion(&fmr->mr);
- kfree(fmr);
-out:
- return ret;
-}
-
-void mr_rcu_callback(struct rcu_head *list)
-{
- struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
-
- complete(&mr->comp);
-}
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 3eff35c2d453..575b737d9ef3 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -34,32 +34,38 @@
#include <linux/err.h>
#include <linux/vmalloc.h>
-#include <linux/jhash.h>
+#include <rdma/rdma_vt.h>
#ifdef CONFIG_DEBUG_FS
#include <linux/seq_file.h>
#endif
#include "qib.h"
-#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
+/*
+ * mask field which was present in now deleted qib_qpn_table
+ * is not present in rvt_qpn_table. Defining the same field
+ * as qpt_mask here instead of adding the mask field to
+ * rvt_qpn_table.
+ */
+u16 qpt_mask;
-static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
- struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+ struct rvt_qpn_map *map, unsigned off)
{
- return (map - qpt->map) * BITS_PER_PAGE + off;
+ return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}
-static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
- struct qpn_map *map, unsigned off,
+static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
+ struct rvt_qpn_map *map, unsigned off,
unsigned n)
{
- if (qpt->mask) {
+ if (qpt_mask) {
off++;
- if (((off & qpt->mask) >> 1) >= n)
- off = (off | qpt->mask) + 2;
- } else
- off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
+ if (((off & qpt_mask) >> 1) >= n)
+ off = (off | qpt_mask) + 2;
+ } else {
+ off = find_next_zero_bit(map->page, RVT_BITS_PER_PAGE, off);
+ }
return off;
}
@@ -100,7 +106,7 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
gfp_t gfp)
{
unsigned long page = get_zeroed_page(gfp);
@@ -121,12 +127,15 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
* Allocate the next available QPN or
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
-static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- enum ib_qp_type type, u8 port, gfp_t gfp)
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+ enum ib_qp_type type, u8 port, gfp_t gfp)
{
u32 i, offset, max_scan, qpn;
- struct qpn_map *map;
+ struct rvt_qpn_map *map;
u32 ret;
+ struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+ verbs_dev);
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -143,12 +152,12 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
}
qpn = qpt->last + 2;
- if (qpn >= QPN_MAX)
+ if (qpn >= RVT_QPN_MAX)
qpn = 2;
- if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
- qpn = (qpn | qpt->mask) + 2;
- offset = qpn & BITS_PER_PAGE_MASK;
- map = &qpt->map[qpn / BITS_PER_PAGE];
+ if (qpt_mask && ((qpn & qpt_mask) >> 1) >= dd->n_krcv_queues)
+ qpn = (qpn | qpt_mask) + 2;
+ offset = qpn & RVT_BITS_PER_PAGE_MASK;
+ map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
@@ -173,14 +182,14 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
* We just need to be sure we don't loop
* forever.
*/
- } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
+ } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
/*
* In order to keep the number of pages allocated to a
* minimum, we scan the all existing pages before increasing
* the size of the bitmap table.
*/
if (++i > max_scan) {
- if (qpt->nmaps == QPNMAP_ENTRIES)
+ if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
break;
map = &qpt->map[qpt->nmaps++];
offset = 0;
@@ -200,706 +209,113 @@ bail:
return ret;
}
-static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
-{
- struct qpn_map *map;
-
- map = qpt->map + qpn / BITS_PER_PAGE;
- if (map->page)
- clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
-{
- return jhash_1word(qpn, dev->qp_rnd) &
- (dev->qp_table_size - 1);
-}
-
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
- struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- unsigned long flags;
- unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-
- atomic_inc(&qp->refcount);
- spin_lock_irqsave(&dev->qpt_lock, flags);
-
- if (qp->ibqp.qp_num == 0)
- rcu_assign_pointer(ibp->qp0, qp);
- else if (qp->ibqp.qp_num == 1)
- rcu_assign_pointer(ibp->qp1, qp);
- else {
- qp->next = dev->qp_table[n];
- rcu_assign_pointer(dev->qp_table[n], qp);
- }
-
- spin_unlock_irqrestore(&dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
- struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
- unsigned long flags;
- int removed = 1;
-
- spin_lock_irqsave(&dev->qpt_lock, flags);
-
- if (rcu_dereference_protected(ibp->qp0,
- lockdep_is_held(&dev->qpt_lock)) == qp) {
- RCU_INIT_POINTER(ibp->qp0, NULL);
- } else if (rcu_dereference_protected(ibp->qp1,
- lockdep_is_held(&dev->qpt_lock)) == qp) {
- RCU_INIT_POINTER(ibp->qp1, NULL);
- } else {
- struct qib_qp *q;
- struct qib_qp __rcu **qpp;
-
- removed = 0;
- qpp = &dev->qp_table[n];
- for (; (q = rcu_dereference_protected(*qpp,
- lockdep_is_held(&dev->qpt_lock))) != NULL;
- qpp = &q->next)
- if (q == qp) {
- RCU_INIT_POINTER(*qpp,
- rcu_dereference_protected(qp->next,
- lockdep_is_held(&dev->qpt_lock)));
- removed = 1;
- break;
- }
- }
-
- spin_unlock_irqrestore(&dev->qpt_lock, flags);
- if (removed) {
- synchronize_rcu();
- atomic_dec(&qp->refcount);
- }
-}
-
/**
* qib_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
*/
-unsigned qib_free_all_qps(struct qib_devdata *dd)
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi)
{
- struct qib_ibdev *dev = &dd->verbs_dev;
- unsigned long flags;
- struct qib_qp *qp;
+ struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+ verbs_dev);
unsigned n, qp_inuse = 0;
for (n = 0; n < dd->num_pports; n++) {
struct qib_ibport *ibp = &dd->pport[n].ibport_data;
- if (!qib_mcast_tree_empty(ibp))
- qp_inuse++;
rcu_read_lock();
- if (rcu_dereference(ibp->qp0))
+ if (rcu_dereference(ibp->rvp.qp[0]))
qp_inuse++;
- if (rcu_dereference(ibp->qp1))
+ if (rcu_dereference(ibp->rvp.qp[1]))
qp_inuse++;
rcu_read_unlock();
}
-
- spin_lock_irqsave(&dev->qpt_lock, flags);
- for (n = 0; n < dev->qp_table_size; n++) {
- qp = rcu_dereference_protected(dev->qp_table[n],
- lockdep_is_held(&dev->qpt_lock));
- RCU_INIT_POINTER(dev->qp_table[n], NULL);
-
- for (; qp; qp = rcu_dereference_protected(qp->next,
- lockdep_is_held(&dev->qpt_lock)))
- qp_inuse++;
- }
- spin_unlock_irqrestore(&dev->qpt_lock, flags);
- synchronize_rcu();
-
return qp_inuse;
}
-/**
- * qib_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
+void qib_notify_qp_reset(struct rvt_qp *qp)
{
- struct qib_qp *qp = NULL;
-
- rcu_read_lock();
- if (unlikely(qpn <= 1)) {
- if (qpn == 0)
- qp = rcu_dereference(ibp->qp0);
- else
- qp = rcu_dereference(ibp->qp1);
- if (qp)
- atomic_inc(&qp->refcount);
- } else {
- struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
- unsigned n = qpn_hash(dev, qpn);
-
- for (qp = rcu_dereference(dev->qp_table[n]); qp;
- qp = rcu_dereference(qp->next))
- if (qp->ibqp.qp_num == qpn) {
- atomic_inc(&qp->refcount);
- break;
- }
- }
- rcu_read_unlock();
- return qp;
-}
-
-/**
- * qib_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type)
-{
- qp->remote_qpn = 0;
- qp->qkey = 0;
- qp->qp_access_flags = 0;
- atomic_set(&qp->s_dma_busy, 0);
- qp->s_flags &= QIB_S_SIGNAL_REQ_WR;
- qp->s_hdrwords = 0;
- qp->s_wqe = NULL;
- qp->s_draining = 0;
- qp->s_next_psn = 0;
- qp->s_last_psn = 0;
- qp->s_sending_psn = 0;
- qp->s_sending_hpsn = 0;
- qp->s_psn = 0;
- qp->r_psn = 0;
- qp->r_msn = 0;
- if (type == IB_QPT_RC) {
- qp->s_state = IB_OPCODE_RC_SEND_LAST;
- qp->r_state = IB_OPCODE_RC_SEND_LAST;
- } else {
- qp->s_state = IB_OPCODE_UC_SEND_LAST;
- qp->r_state = IB_OPCODE_UC_SEND_LAST;
- }
- qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- qp->r_nak_state = 0;
- qp->r_aflags = 0;
- qp->r_flags = 0;
- qp->s_head = 0;
- qp->s_tail = 0;
- qp->s_cur = 0;
- qp->s_acked = 0;
- qp->s_last = 0;
- qp->s_ssn = 1;
- qp->s_lsn = 0;
- qp->s_mig_state = IB_MIG_MIGRATED;
- memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
- qp->r_head_ack_queue = 0;
- qp->s_tail_ack_queue = 0;
- qp->s_num_rd_atomic = 0;
- if (qp->r_rq.wq) {
- qp->r_rq.wq->head = 0;
- qp->r_rq.wq->tail = 0;
- }
- qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
-{
- unsigned n;
-
- if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- qib_put_ss(&qp->s_rdma_read_sge);
-
- qib_put_ss(&qp->r_sge);
-
- if (clr_sends) {
- while (qp->s_last != qp->s_head) {
- struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
- unsigned i;
-
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct qib_sge *sge = &wqe->sg_list[i];
-
- qib_put_mr(sge->mr);
- }
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- }
- if (qp->s_rdma_mr) {
- qib_put_mr(qp->s_rdma_mr);
- qp->s_rdma_mr = NULL;
- }
- }
-
- if (qp->ibqp.qp_type != IB_QPT_RC)
- return;
+ struct qib_qp_priv *priv = qp->priv;
- for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
- struct qib_ack_entry *e = &qp->s_ack_queue[n];
-
- if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
- e->rdma_sge.mr) {
- qib_put_mr(e->rdma_sge.mr);
- e->rdma_sge.mr = NULL;
- }
- }
+ atomic_set(&priv->s_dma_busy, 0);
}
-/**
- * qib_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
+void qib_notify_error_qp(struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
- struct ib_wc wc;
- int ret = 0;
-
- if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
- goto bail;
-
- qp->state = IB_QPS_ERR;
-
- if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
- qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
-
- if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
- qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
- spin_lock(&dev->pending_lock);
- if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
- qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
- list_del_init(&qp->iowait);
+ spin_lock(&dev->rdi.pending_lock);
+ if (!list_empty(&priv->iowait) && !(qp->s_flags & RVT_S_BUSY)) {
+ qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+ list_del_init(&priv->iowait);
}
- spin_unlock(&dev->pending_lock);
+ spin_unlock(&dev->rdi.pending_lock);
- if (!(qp->s_flags & QIB_S_BUSY)) {
+ if (!(qp->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
- qib_put_mr(qp->s_rdma_mr);
+ rvt_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
- if (qp->s_tx) {
- qib_put_txreq(qp->s_tx);
- qp->s_tx = NULL;
+ if (priv->s_tx) {
+ qib_put_txreq(priv->s_tx);
+ priv->s_tx = NULL;
}
}
-
- /* Schedule the sending tasklet to drain the send work queue. */
- if (qp->s_last != qp->s_head)
- qib_schedule_send(qp);
-
- clear_mr_refs(qp, 0);
-
- memset(&wc, 0, sizeof(wc));
- wc.qp = &qp->ibqp;
- wc.opcode = IB_WC_RECV;
-
- if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) {
- wc.wr_id = qp->r_wr_id;
- wc.status = err;
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
- }
- wc.status = IB_WC_WR_FLUSH_ERR;
-
- if (qp->r_rq.wq) {
- struct qib_rwq *wq;
- u32 head;
- u32 tail;
-
- spin_lock(&qp->r_rq.lock);
-
- /* sanity check pointers before trusting them */
- wq = qp->r_rq.wq;
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- while (tail != head) {
- wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
- if (++tail >= qp->r_rq.size)
- tail = 0;
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
- }
- wq->tail = tail;
-
- spin_unlock(&qp->r_rq.lock);
- } else if (qp->ibqp.event_handler)
- ret = 1;
-
-bail:
- return ret;
}
-/**
- * qib_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int mtu_to_enum(u32 mtu)
{
- struct qib_ibdev *dev = to_idev(ibqp->device);
- struct qib_qp *qp = to_iqp(ibqp);
- enum ib_qp_state cur_state, new_state;
- struct ib_event ev;
- int lastwqe = 0;
- int mig = 0;
- int ret;
- u32 pmtu = 0; /* for gcc warning only */
-
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_lock);
-
- cur_state = attr_mask & IB_QP_CUR_STATE ?
- attr->cur_qp_state : qp->state;
- new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, IB_LINK_LAYER_UNSPECIFIED))
- goto inval;
-
- if (attr_mask & IB_QP_AV) {
- if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
- goto inval;
- if (qib_check_ah(qp->ibqp.device, &attr->ah_attr))
- goto inval;
- }
-
- if (attr_mask & IB_QP_ALT_PATH) {
- if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
- goto inval;
- if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
- goto inval;
- if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
- goto inval;
- }
-
- if (attr_mask & IB_QP_PKEY_INDEX)
- if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
- goto inval;
-
- if (attr_mask & IB_QP_MIN_RNR_TIMER)
- if (attr->min_rnr_timer > 31)
- goto inval;
-
- if (attr_mask & IB_QP_PORT)
- if (qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI ||
- attr->port_num == 0 ||
- attr->port_num > ibqp->device->phys_port_cnt)
- goto inval;
-
- if (attr_mask & IB_QP_DEST_QPN)
- if (attr->dest_qp_num > QIB_QPN_MASK)
- goto inval;
-
- if (attr_mask & IB_QP_RETRY_CNT)
- if (attr->retry_cnt > 7)
- goto inval;
-
- if (attr_mask & IB_QP_RNR_RETRY)
- if (attr->rnr_retry > 7)
- goto inval;
-
- /*
- * Don't allow invalid path_mtu values. OK to set greater
- * than the active mtu (or even the max_cap, if we have tuned
- * that to a small mtu. We'll set qp->path_mtu
- * to the lesser of requested attribute mtu and active,
- * for packetizing messages.
- * Note that the QP port has to be set in INIT and MTU in RTR.
- */
- if (attr_mask & IB_QP_PATH_MTU) {
- struct qib_devdata *dd = dd_from_dev(dev);
- int mtu, pidx = qp->port_num - 1;
-
- mtu = ib_mtu_enum_to_int(attr->path_mtu);
- if (mtu == -1)
- goto inval;
- if (mtu > dd->pport[pidx].ibmtu) {
- switch (dd->pport[pidx].ibmtu) {
- case 4096:
- pmtu = IB_MTU_4096;
- break;
- case 2048:
- pmtu = IB_MTU_2048;
- break;
- case 1024:
- pmtu = IB_MTU_1024;
- break;
- case 512:
- pmtu = IB_MTU_512;
- break;
- case 256:
- pmtu = IB_MTU_256;
- break;
- default:
- pmtu = IB_MTU_2048;
- }
- } else
- pmtu = attr->path_mtu;
- }
-
- if (attr_mask & IB_QP_PATH_MIG_STATE) {
- if (attr->path_mig_state == IB_MIG_REARM) {
- if (qp->s_mig_state == IB_MIG_ARMED)
- goto inval;
- if (new_state != IB_QPS_RTS)
- goto inval;
- } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
- if (qp->s_mig_state == IB_MIG_REARM)
- goto inval;
- if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
- goto inval;
- if (qp->s_mig_state == IB_MIG_ARMED)
- mig = 1;
- } else
- goto inval;
- }
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC)
- goto inval;
+ int enum_mtu;
- switch (new_state) {
- case IB_QPS_RESET:
- if (qp->state != IB_QPS_RESET) {
- qp->state = IB_QPS_RESET;
- spin_lock(&dev->pending_lock);
- if (!list_empty(&qp->iowait))
- list_del_init(&qp->iowait);
- spin_unlock(&dev->pending_lock);
- qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
- /* Stop the sending work queue and retry timer */
- cancel_work_sync(&qp->s_work);
- del_timer_sync(&qp->s_timer);
- wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
- if (qp->s_tx) {
- qib_put_txreq(qp->s_tx);
- qp->s_tx = NULL;
- }
- remove_qp(dev, qp);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_lock);
- clear_mr_refs(qp, 1);
- qib_reset_qp(qp, ibqp->qp_type);
- }
+ switch (mtu) {
+ case 4096:
+ enum_mtu = IB_MTU_4096;
break;
-
- case IB_QPS_RTR:
- /* Allow event to retrigger if QP set to RTR more than once */
- qp->r_flags &= ~QIB_R_COMM_EST;
- qp->state = new_state;
+ case 2048:
+ enum_mtu = IB_MTU_2048;
break;
-
- case IB_QPS_SQD:
- qp->s_draining = qp->s_last != qp->s_cur;
- qp->state = new_state;
+ case 1024:
+ enum_mtu = IB_MTU_1024;
break;
-
- case IB_QPS_SQE:
- if (qp->ibqp.qp_type == IB_QPT_RC)
- goto inval;
- qp->state = new_state;
+ case 512:
+ enum_mtu = IB_MTU_512;
break;
-
- case IB_QPS_ERR:
- lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ case 256:
+ enum_mtu = IB_MTU_256;
break;
-
default:
- qp->state = new_state;
- break;
- }
-
- if (attr_mask & IB_QP_PKEY_INDEX)
- qp->s_pkey_index = attr->pkey_index;
-
- if (attr_mask & IB_QP_PORT)
- qp->port_num = attr->port_num;
-
- if (attr_mask & IB_QP_DEST_QPN)
- qp->remote_qpn = attr->dest_qp_num;
-
- if (attr_mask & IB_QP_SQ_PSN) {
- qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK;
- qp->s_psn = qp->s_next_psn;
- qp->s_sending_psn = qp->s_next_psn;
- qp->s_last_psn = qp->s_next_psn - 1;
- qp->s_sending_hpsn = qp->s_last_psn;
- }
-
- if (attr_mask & IB_QP_RQ_PSN)
- qp->r_psn = attr->rq_psn & QIB_PSN_MASK;
-
- if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->qp_access_flags = attr->qp_access_flags;
-
- if (attr_mask & IB_QP_AV) {
- qp->remote_ah_attr = attr->ah_attr;
- qp->s_srate = attr->ah_attr.static_rate;
- }
-
- if (attr_mask & IB_QP_ALT_PATH) {
- qp->alt_ah_attr = attr->alt_ah_attr;
- qp->s_alt_pkey_index = attr->alt_pkey_index;
- }
-
- if (attr_mask & IB_QP_PATH_MIG_STATE) {
- qp->s_mig_state = attr->path_mig_state;
- if (mig) {
- qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
- qp->s_pkey_index = qp->s_alt_pkey_index;
- }
- }
-
- if (attr_mask & IB_QP_PATH_MTU) {
- qp->path_mtu = pmtu;
- qp->pmtu = ib_mtu_enum_to_int(pmtu);
- }
-
- if (attr_mask & IB_QP_RETRY_CNT) {
- qp->s_retry_cnt = attr->retry_cnt;
- qp->s_retry = attr->retry_cnt;
- }
-
- if (attr_mask & IB_QP_RNR_RETRY) {
- qp->s_rnr_retry_cnt = attr->rnr_retry;
- qp->s_rnr_retry = attr->rnr_retry;
+ enum_mtu = IB_MTU_2048;
}
-
- if (attr_mask & IB_QP_MIN_RNR_TIMER)
- qp->r_min_rnr_timer = attr->min_rnr_timer;
-
- if (attr_mask & IB_QP_TIMEOUT) {
- qp->timeout = attr->timeout;
- qp->timeout_jiffies =
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
- 1000UL);
- }
-
- if (attr_mask & IB_QP_QKEY)
- qp->qkey = attr->qkey;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
- qp->s_max_rd_atomic = attr->max_rd_atomic;
-
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
-
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- insert_qp(dev, qp);
-
- if (lastwqe) {
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- if (mig) {
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_PATH_MIG;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- ret = 0;
- goto bail;
-
-inval:
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
- ret = -EINVAL;
-
-bail:
- return ret;
+ return enum_mtu;
}
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_qp_init_attr *init_attr)
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_attr *attr)
{
- struct qib_qp *qp = to_iqp(ibqp);
+ int mtu, pmtu, pidx = qp->port_num - 1;
+ struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+ verbs_dev);
+ mtu = ib_mtu_enum_to_int(attr->path_mtu);
+ if (mtu == -1)
+ return -EINVAL;
+
+ if (mtu > dd->pport[pidx].ibmtu)
+ pmtu = mtu_to_enum(dd->pport[pidx].ibmtu);
+ else
+ pmtu = attr->path_mtu;
+ return pmtu;
+}
- attr->qp_state = qp->state;
- attr->cur_qp_state = attr->qp_state;
- attr->path_mtu = qp->path_mtu;
- attr->path_mig_state = qp->s_mig_state;
- attr->qkey = qp->qkey;
- attr->rq_psn = qp->r_psn & QIB_PSN_MASK;
- attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK;
- attr->dest_qp_num = qp->remote_qpn;
- attr->qp_access_flags = qp->qp_access_flags;
- attr->cap.max_send_wr = qp->s_size - 1;
- attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
- attr->cap.max_send_sge = qp->s_max_sge;
- attr->cap.max_recv_sge = qp->r_rq.max_sge;
- attr->cap.max_inline_data = 0;
- attr->ah_attr = qp->remote_ah_attr;
- attr->alt_ah_attr = qp->alt_ah_attr;
- attr->pkey_index = qp->s_pkey_index;
- attr->alt_pkey_index = qp->s_alt_pkey_index;
- attr->en_sqd_async_notify = 0;
- attr->sq_draining = qp->s_draining;
- attr->max_rd_atomic = qp->s_max_rd_atomic;
- attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
- attr->min_rnr_timer = qp->r_min_rnr_timer;
- attr->port_num = qp->port_num;
- attr->timeout = qp->timeout;
- attr->retry_cnt = qp->s_retry_cnt;
- attr->rnr_retry = qp->s_rnr_retry_cnt;
- attr->alt_port_num = qp->alt_ah_attr.port_num;
- attr->alt_timeout = qp->alt_timeout;
+int qib_mtu_to_path_mtu(u32 mtu)
+{
+ return mtu_to_enum(mtu);
+}
- init_attr->event_handler = qp->ibqp.event_handler;
- init_attr->qp_context = qp->ibqp.qp_context;
- init_attr->send_cq = qp->ibqp.send_cq;
- init_attr->recv_cq = qp->ibqp.recv_cq;
- init_attr->srq = qp->ibqp.srq;
- init_attr->cap = attr->cap;
- if (qp->s_flags & QIB_S_SIGNAL_REQ_WR)
- init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
- else
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->qp_type = qp->ibqp.qp_type;
- init_attr->port_num = qp->port_num;
- return 0;
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+ return ib_mtu_enum_to_int(pmtu);
}
/**
@@ -908,7 +324,7 @@ int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
*
* Returns the AETH.
*/
-__be32 qib_compute_aeth(struct qib_qp *qp)
+__be32 qib_compute_aeth(struct rvt_qp *qp)
{
u32 aeth = qp->r_msn & QIB_MSN_MASK;
@@ -921,7 +337,7 @@ __be32 qib_compute_aeth(struct qib_qp *qp)
} else {
u32 min, max, x;
u32 credits;
- struct qib_rwq *wq = qp->r_rq.wq;
+ struct rvt_rwq *wq = qp->r_rq.wq;
u32 head;
u32 tail;
@@ -962,315 +378,63 @@ __be32 qib_compute_aeth(struct qib_qp *qp)
return cpu_to_be32(aeth);
}
-/**
- * qib_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
{
- struct qib_qp *qp;
- int err;
- struct qib_swqe *swq = NULL;
- struct qib_ibdev *dev;
- struct qib_devdata *dd;
- size_t sz;
- size_t sg_list_sz;
- struct ib_qp *ret;
- gfp_t gfp;
+ struct qib_qp_priv *priv;
+ priv = kzalloc(sizeof(*priv), gfp);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+ priv->owner = qp;
- if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
- init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
- init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
- return ERR_PTR(-EINVAL);
-
- /* GFP_NOIO is applicable in RC QPs only */
- if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
- init_attr->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
-
- gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
- GFP_NOIO : GFP_KERNEL;
-
- /* Check receive queue parameters if no SRQ is specified. */
- if (!init_attr->srq) {
- if (init_attr->cap.max_recv_sge > ib_qib_max_sges ||
- init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- if (init_attr->cap.max_send_sge +
- init_attr->cap.max_send_wr +
- init_attr->cap.max_recv_sge +
- init_attr->cap.max_recv_wr == 0) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
+ priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp);
+ if (!priv->s_hdr) {
+ kfree(priv);
+ return ERR_PTR(-ENOMEM);
}
+ init_waitqueue_head(&priv->wait_dma);
+ INIT_WORK(&priv->s_work, _qib_do_send);
+ INIT_LIST_HEAD(&priv->iowait);
- switch (init_attr->qp_type) {
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- if (init_attr->port_num == 0 ||
- init_attr->port_num > ibpd->device->phys_port_cnt) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- case IB_QPT_UC:
- case IB_QPT_RC:
- case IB_QPT_UD:
- sz = sizeof(struct qib_sge) *
- init_attr->cap.max_send_sge +
- sizeof(struct qib_swqe);
- swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
- gfp, PAGE_KERNEL);
- if (swq == NULL) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
- sz = sizeof(*qp);
- sg_list_sz = 0;
- if (init_attr->srq) {
- struct qib_srq *srq = to_isrq(init_attr->srq);
-
- if (srq->rq.max_sge > 1)
- sg_list_sz = sizeof(*qp->r_sg_list) *
- (srq->rq.max_sge - 1);
- } else if (init_attr->cap.max_recv_sge > 1)
- sg_list_sz = sizeof(*qp->r_sg_list) *
- (init_attr->cap.max_recv_sge - 1);
- qp = kzalloc(sz + sg_list_sz, gfp);
- if (!qp) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_swq;
- }
- RCU_INIT_POINTER(qp->next, NULL);
- qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
- if (!qp->s_hdr) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
- }
- qp->timeout_jiffies =
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
- 1000UL);
- if (init_attr->srq)
- sz = 0;
- else {
- qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
- qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
- sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
- sizeof(struct qib_rwqe);
- if (gfp != GFP_NOIO)
- qp->r_rq.wq = vmalloc_user(
- sizeof(struct qib_rwq) +
- qp->r_rq.size * sz);
- else
- qp->r_rq.wq = __vmalloc(
- sizeof(struct qib_rwq) +
- qp->r_rq.size * sz,
- gfp, PAGE_KERNEL);
-
- if (!qp->r_rq.wq) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
- }
- }
-
- /*
- * ib_create_qp() will initialize qp->ibqp
- * except for qp->ibqp.qp_num.
- */
- spin_lock_init(&qp->r_lock);
- spin_lock_init(&qp->s_lock);
- spin_lock_init(&qp->r_rq.lock);
- atomic_set(&qp->refcount, 0);
- init_waitqueue_head(&qp->wait);
- init_waitqueue_head(&qp->wait_dma);
- init_timer(&qp->s_timer);
- qp->s_timer.data = (unsigned long)qp;
- INIT_WORK(&qp->s_work, qib_do_send);
- INIT_LIST_HEAD(&qp->iowait);
- INIT_LIST_HEAD(&qp->rspwait);
- qp->state = IB_QPS_RESET;
- qp->s_wq = swq;
- qp->s_size = init_attr->cap.max_send_wr + 1;
- qp->s_max_sge = init_attr->cap.max_send_sge;
- if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
- qp->s_flags = QIB_S_SIGNAL_REQ_WR;
- dev = to_idev(ibpd->device);
- dd = dd_from_dev(dev);
- err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
- init_attr->port_num, gfp);
- if (err < 0) {
- ret = ERR_PTR(err);
- vfree(qp->r_rq.wq);
- goto bail_qp;
- }
- qp->ibqp.qp_num = err;
- qp->port_num = init_attr->port_num;
- qib_reset_qp(qp, init_attr->qp_type);
- break;
-
- default:
- /* Don't support raw QPs */
- ret = ERR_PTR(-ENOSYS);
- goto bail;
- }
-
- init_attr->cap.max_inline_data = 0;
-
- /*
- * Return the address of the RWQ as the offset to mmap.
- * See qib_mmap() for details.
- */
- if (udata && udata->outlen >= sizeof(__u64)) {
- if (!qp->r_rq.wq) {
- __u64 offset = 0;
-
- err = ib_copy_to_udata(udata, &offset,
- sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_ip;
- }
- } else {
- u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz;
-
- qp->ip = qib_create_mmap_info(dev, s,
- ibpd->uobject->context,
- qp->r_rq.wq);
- if (!qp->ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_ip;
- }
-
- err = ib_copy_to_udata(udata, &(qp->ip->offset),
- sizeof(qp->ip->offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_ip;
- }
- }
- }
-
- spin_lock(&dev->n_qps_lock);
- if (dev->n_qps_allocated == ib_qib_max_qps) {
- spin_unlock(&dev->n_qps_lock);
- ret = ERR_PTR(-ENOMEM);
- goto bail_ip;
- }
-
- dev->n_qps_allocated++;
- spin_unlock(&dev->n_qps_lock);
-
- if (qp->ip) {
- spin_lock_irq(&dev->pending_lock);
- list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
- }
-
- ret = &qp->ibqp;
- goto bail;
-
-bail_ip:
- if (qp->ip)
- kref_put(&qp->ip->ref, qib_release_mmap_info);
- else
- vfree(qp->r_rq.wq);
- free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
- kfree(qp->s_hdr);
- kfree(qp);
-bail_swq:
- vfree(swq);
-bail:
- return ret;
+ return priv;
}
-/**
- * qib_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int qib_destroy_qp(struct ib_qp *ibqp)
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
- struct qib_qp *qp = to_iqp(ibqp);
- struct qib_ibdev *dev = to_idev(ibqp->device);
+ struct qib_qp_priv *priv = qp->priv;
- /* Make sure HW and driver activity is stopped. */
- spin_lock_irq(&qp->s_lock);
- if (qp->state != IB_QPS_RESET) {
- qp->state = IB_QPS_RESET;
- spin_lock(&dev->pending_lock);
- if (!list_empty(&qp->iowait))
- list_del_init(&qp->iowait);
- spin_unlock(&dev->pending_lock);
- qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
- spin_unlock_irq(&qp->s_lock);
- cancel_work_sync(&qp->s_work);
- del_timer_sync(&qp->s_timer);
- wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
- if (qp->s_tx) {
- qib_put_txreq(qp->s_tx);
- qp->s_tx = NULL;
- }
- remove_qp(dev, qp);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
- clear_mr_refs(qp, 1);
- } else
- spin_unlock_irq(&qp->s_lock);
+ kfree(priv->s_hdr);
+ kfree(priv);
+}
- /* all user's cleaned up, mark it available */
- free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
- spin_lock(&dev->n_qps_lock);
- dev->n_qps_allocated--;
- spin_unlock(&dev->n_qps_lock);
+void qib_stop_send_queue(struct rvt_qp *qp)
+{
+ struct qib_qp_priv *priv = qp->priv;
- if (qp->ip)
- kref_put(&qp->ip->ref, qib_release_mmap_info);
- else
- vfree(qp->r_rq.wq);
- vfree(qp->s_wq);
- kfree(qp->s_hdr);
- kfree(qp);
- return 0;
+ cancel_work_sync(&priv->s_work);
+ del_timer_sync(&qp->s_timer);
}
-/**
- * qib_init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt)
+void qib_quiesce_qp(struct rvt_qp *qp)
{
- spin_lock_init(&qpt->lock);
- qpt->last = 1; /* start with QPN 2 */
- qpt->nmaps = 1;
- qpt->mask = dd->qpn_mask;
+ struct qib_qp_priv *priv = qp->priv;
+
+ wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy));
+ if (priv->s_tx) {
+ qib_put_txreq(priv->s_tx);
+ priv->s_tx = NULL;
+ }
}
-/**
- * qib_free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_free_qpn_table(struct qib_qpn_table *qpt)
+void qib_flush_qp_waiters(struct rvt_qp *qp)
{
- int i;
+ struct qib_qp_priv *priv = qp->priv;
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
- for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
- if (qpt->map[i].page)
- free_page((unsigned long) qpt->map[i].page);
+ spin_lock(&dev->rdi.pending_lock);
+ if (!list_empty(&priv->iowait))
+ list_del_init(&priv->iowait);
+ spin_unlock(&dev->rdi.pending_lock);
}
/**
@@ -1280,7 +444,7 @@ void qib_free_qpn_table(struct qib_qpn_table *qpt)
*
* The QP s_lock should be held.
*/
-void qib_get_credit(struct qib_qp *qp, u32 aeth)
+void qib_get_credit(struct rvt_qp *qp, u32 aeth)
{
u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
@@ -1290,31 +454,70 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
* honor the credit field.
*/
if (credit == QIB_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= QIB_S_UNLIMITED_CREDIT;
- if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+ qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
qib_schedule_send(qp);
}
}
- } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
+ } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
/* Compute new LSN (i.e., MSN + credit) */
credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
if (qib_cmp24(credit, qp->s_lsn) > 0) {
qp->s_lsn = credit;
- if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
qib_schedule_send(qp);
}
}
}
}
+/**
+ * qib_check_send_wqe - validate wr/wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wr/wqe. This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ */
+int qib_check_send_wqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ struct rvt_ah *ah;
+ int ret = 0;
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ if (wqe->length > 0x80000000U)
+ return -EINVAL;
+ break;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ if (wqe->length > (1 << ah->log_pmtu))
+ return -EINVAL;
+ /* progress hint */
+ ret = 1;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
#ifdef CONFIG_DEBUG_FS
struct qib_qp_iter {
struct qib_ibdev *dev;
- struct qib_qp *qp;
+ struct rvt_qp *qp;
int n;
};
@@ -1340,14 +543,14 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
struct qib_ibdev *dev = iter->dev;
int n = iter->n;
int ret = 1;
- struct qib_qp *pqp = iter->qp;
- struct qib_qp *qp;
+ struct rvt_qp *pqp = iter->qp;
+ struct rvt_qp *qp;
- for (; n < dev->qp_table_size; n++) {
+ for (; n < dev->rdi.qp_dev->qp_table_size; n++) {
if (pqp)
qp = rcu_dereference(pqp->next);
else
- qp = rcu_dereference(dev->qp_table[n]);
+ qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]);
pqp = qp;
if (qp) {
iter->qp = qp;
@@ -1364,10 +567,11 @@ static const char * const qp_type_str[] = {
void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
{
- struct qib_swqe *wqe;
- struct qib_qp *qp = iter->qp;
+ struct rvt_swqe *wqe;
+ struct rvt_qp *qp = iter->qp;
+ struct qib_qp_priv *priv = qp->priv;
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
seq_printf(s,
"N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
iter->n,
@@ -1377,8 +581,8 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
wqe->wr.opcode,
qp->s_hdrwords,
qp->s_flags,
- atomic_read(&qp->s_dma_busy),
- !list_empty(&qp->iowait),
+ atomic_read(&priv->s_dma_busy),
+ !list_empty(&priv->iowait),
qp->timeout,
wqe->ssn,
qp->s_lsn,
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index e6b7556d5221..9088e26d3ac8 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -40,7 +40,7 @@
static void rc_timeout(unsigned long arg);
-static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
u32 len;
@@ -54,9 +54,9 @@ static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
return wqe->length - len;
}
-static void start_timer(struct qib_qp *qp)
+static void start_timer(struct rvt_qp *qp)
{
- qp->s_flags |= QIB_S_TIMER;
+ qp->s_flags |= RVT_S_TIMER;
qp->s_timer.function = rc_timeout;
/* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies;
@@ -74,17 +74,17 @@ static void start_timer(struct qib_qp *qp)
* Note that we are in the responder's side of the QP context.
* Note the QP s_lock must be held.
*/
-static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
struct qib_other_headers *ohdr, u32 pmtu)
{
- struct qib_ack_entry *e;
+ struct rvt_ack_entry *e;
u32 hwords;
u32 len;
u32 bth0;
u32 bth2;
/* Don't send an ACK if we aren't supposed to. */
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) {
- qib_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */
@@ -112,7 +112,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
- if (qp->s_flags & QIB_S_ACK_PENDING)
+ if (qp->s_flags & RVT_S_ACK_PENDING)
goto normal;
goto bail;
}
@@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
/* Copy SGE state in case we need to resend */
qp->s_rdma_mr = e->rdma_sge.mr;
if (qp->s_rdma_mr)
- qib_get_mr(qp->s_rdma_mr);
+ rvt_get_mr(qp->s_rdma_mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
if (qp->s_rdma_mr)
- qib_get_mr(qp->s_rdma_mr);
+ rvt_get_mr(qp->s_rdma_mr);
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
@@ -196,7 +196,7 @@ normal:
* (see above).
*/
qp->s_ack_state = OP(SEND_ONLY);
- qp->s_flags &= ~QIB_S_ACK_PENDING;
+ qp->s_flags &= ~RVT_S_ACK_PENDING;
qp->s_cur_sge = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
@@ -218,7 +218,7 @@ normal:
bail:
qp->s_ack_state = OP(ACKNOWLEDGE);
- qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+ qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
return 0;
}
@@ -226,63 +226,60 @@ bail:
* qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
*
+ * Assumes the s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_rc_req(struct qib_qp *qp)
+int qib_make_rc_req(struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
struct qib_other_headers *ohdr;
- struct qib_sge_state *ss;
- struct qib_swqe *wqe;
+ struct rvt_sge_state *ss;
+ struct rvt_swqe *wqe;
u32 hwords;
u32 len;
u32 bth0;
u32 bth2;
u32 pmtu = qp->pmtu;
char newreq;
- unsigned long flags;
int ret = 0;
int delta;
- ohdr = &qp->s_hdr->u.oth;
+ ohdr = &priv->s_hdr->u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr->u.l.oth;
-
- /*
- * The lock is needed to synchronize between the sending tasklet,
- * the receive interrupt handler, and timeout resends.
- */
- spin_lock_irqsave(&qp->s_lock, flags);
+ ohdr = &priv->s_hdr->u.l.oth;
/* Sending responses has higher priority over sending requests. */
- if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+ if ((qp->s_flags & RVT_S_RESP_PENDING) &&
qib_make_rc_ack(dev, qp, ohdr, pmtu))
goto done;
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
- if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
- if (atomic_read(&qp->s_dma_busy)) {
- qp->s_flags |= QIB_S_WAIT_DMA;
+ if (atomic_read(&priv->s_dma_busy)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done;
}
- if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+ if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
goto bail;
if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
- qp->s_flags |= QIB_S_WAIT_PSN;
+ qp->s_flags |= RVT_S_WAIT_PSN;
goto bail;
}
qp->s_sending_psn = qp->s_psn;
@@ -294,10 +291,10 @@ int qib_make_rc_req(struct qib_qp *qp)
bth0 = 0;
/* Send a request. */
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) {
default:
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/*
* Resend an old request or start a new one.
@@ -317,11 +314,11 @@ int qib_make_rc_req(struct qib_qp *qp)
*/
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
qp->s_num_rd_atomic) {
- qp->s_flags |= QIB_S_WAIT_FENCE;
+ qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
- wqe->psn = qp->s_next_psn;
newreq = 1;
+ qp->s_psn = wqe->psn;
}
/*
* Note that we have to be careful not to modify the
@@ -335,14 +332,12 @@ int qib_make_rc_req(struct qib_qp *qp)
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+ qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
@@ -363,14 +358,14 @@ int qib_make_rc_req(struct qib_qp *qp)
break;
case IB_WR_RDMA_WRITE:
- if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+ qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -380,9 +375,7 @@ int qib_make_rc_req(struct qib_qp *qp)
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
@@ -411,19 +404,12 @@ int qib_make_rc_req(struct qib_qp *qp)
if (newreq) {
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
- qp->s_flags |= QIB_S_WAIT_RDMAR;
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
- if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- /*
- * Adjust s_next_psn to count the
- * expected number of responses.
- */
- if (len > pmtu)
- qp->s_next_psn += (len - 1) / pmtu;
- wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
@@ -449,13 +435,12 @@ int qib_make_rc_req(struct qib_qp *qp)
if (newreq) {
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
- qp->s_flags |= QIB_S_WAIT_RDMAR;
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
- if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- wqe->lpsn = wqe->psn;
}
if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
@@ -498,11 +483,8 @@ int qib_make_rc_req(struct qib_qp *qp)
}
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1;
- else {
+ else
qp->s_psn++;
- if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
- }
break;
case OP(RDMA_READ_RESPONSE_FIRST):
@@ -522,8 +504,6 @@ int qib_make_rc_req(struct qib_qp *qp)
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = qp->s_psn++ & QIB_PSN_MASK;
- if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -563,8 +543,6 @@ int qib_make_rc_req(struct qib_qp *qp)
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = qp->s_psn++ & QIB_PSN_MASK;
- if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -618,9 +596,9 @@ int qib_make_rc_req(struct qib_qp *qp)
delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
if (delta && delta % QIB_PSN_CREDIT == 0)
bth2 |= IB_BTH_REQ_ACK;
- if (qp->s_flags & QIB_S_SEND_ONE) {
- qp->s_flags &= ~QIB_S_SEND_ONE;
- qp->s_flags |= QIB_S_WAIT_ACK;
+ if (qp->s_flags & RVT_S_SEND_ONE) {
+ qp->s_flags &= ~RVT_S_SEND_ONE;
+ qp->s_flags |= RVT_S_WAIT_ACK;
bth2 |= IB_BTH_REQ_ACK;
}
qp->s_len -= len;
@@ -629,13 +607,9 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_cur_size = len;
qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
- qp->s_flags &= ~QIB_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->s_flags &= ~RVT_S_BUSY;
return ret;
}
@@ -647,7 +621,7 @@ unlock:
* Note that RDMA reads and atomics are handled in the
* send side QP state and tasklet.
*/
-void qib_send_rc_ack(struct qib_qp *qp)
+void qib_send_rc_ack(struct rvt_qp *qp)
{
struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -665,11 +639,11 @@ void qib_send_rc_ack(struct qib_qp *qp)
spin_lock_irqsave(&qp->s_lock, flags);
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto unlock;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
- if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+ if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
goto queue_ack;
/* Construct the header with s_lock held so APM doesn't change it. */
@@ -758,9 +732,9 @@ void qib_send_rc_ack(struct qib_qp *qp)
goto done;
queue_ack:
- if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
- ibp->n_rc_qacks++;
- qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+ this_cpu_inc(*ibp->rvp.rc_qacks);
+ qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
@@ -782,10 +756,10 @@ done:
* for the given QP.
* Called at interrupt level with the QP s_lock held.
*/
-static void reset_psn(struct qib_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
{
u32 n = qp->s_acked;
- struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
qp->s_cur = n;
@@ -808,7 +782,7 @@ static void reset_psn(struct qib_qp *qp, u32 psn)
n = 0;
if (n == qp->s_tail)
break;
- wqe = get_swqe_ptr(qp, n);
+ wqe = rvt_get_swqe_ptr(qp, n);
diff = qib_cmp24(psn, wqe->psn);
if (diff < 0)
break;
@@ -854,22 +828,22 @@ static void reset_psn(struct qib_qp *qp, u32 psn)
done:
qp->s_psn = psn;
/*
- * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+ * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
* asynchronously before the send tasklet can get scheduled.
* Doing it in qib_make_rc_req() is too late.
*/
if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
(qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
- qp->s_flags |= QIB_S_WAIT_PSN;
+ qp->s_flags |= RVT_S_WAIT_PSN;
}
/*
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
- struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct qib_ibport *ibp;
if (qp->s_retry == 0) {
@@ -878,7 +852,7 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
- qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else /* XXX need to handle delayed completion */
return;
@@ -887,15 +861,15 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
ibp = to_iport(qp->ibqp.device, qp->port_num);
if (wqe->wr.opcode == IB_WR_RDMA_READ)
- ibp->n_rc_resends++;
+ ibp->rvp.n_rc_resends++;
else
- ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+ ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
- qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
- QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
- QIB_S_WAIT_ACK);
+ qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+ RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+ RVT_S_WAIT_ACK);
if (wait)
- qp->s_flags |= QIB_S_SEND_ONE;
+ qp->s_flags |= RVT_S_SEND_ONE;
reset_psn(qp, psn);
}
@@ -904,16 +878,16 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
*/
static void rc_timeout(unsigned long arg)
{
- struct qib_qp *qp = (struct qib_qp *)arg;
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
struct qib_ibport *ibp;
unsigned long flags;
spin_lock_irqsave(&qp->r_lock, flags);
spin_lock(&qp->s_lock);
- if (qp->s_flags & QIB_S_TIMER) {
+ if (qp->s_flags & RVT_S_TIMER) {
ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->n_rc_timeouts++;
- qp->s_flags &= ~QIB_S_TIMER;
+ ibp->rvp.n_rc_timeouts++;
+ qp->s_flags &= ~RVT_S_TIMER;
del_timer(&qp->s_timer);
qib_restart_rc(qp, qp->s_last_psn + 1, 1);
qib_schedule_send(qp);
@@ -927,12 +901,12 @@ static void rc_timeout(unsigned long arg)
*/
void qib_rc_rnr_retry(unsigned long arg)
{
- struct qib_qp *qp = (struct qib_qp *)arg;
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
unsigned long flags;
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & QIB_S_WAIT_RNR) {
- qp->s_flags &= ~QIB_S_WAIT_RNR;
+ if (qp->s_flags & RVT_S_WAIT_RNR) {
+ qp->s_flags &= ~RVT_S_WAIT_RNR;
del_timer(&qp->s_timer);
qib_schedule_send(qp);
}
@@ -943,14 +917,14 @@ void qib_rc_rnr_retry(unsigned long arg)
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
*/
-static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
{
- struct qib_swqe *wqe;
+ struct rvt_swqe *wqe;
u32 n = qp->s_last;
/* Find the work request corresponding to the given PSN. */
for (;;) {
- wqe = get_swqe_ptr(qp, n);
+ wqe = rvt_get_swqe_ptr(qp, n);
if (qib_cmp24(psn, wqe->lpsn) <= 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_sending_psn = wqe->lpsn + 1;
@@ -968,16 +942,16 @@ static void reset_sending_psn(struct qib_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
{
struct qib_other_headers *ohdr;
- struct qib_swqe *wqe;
+ struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
/* Find out where the BTH is */
@@ -1002,22 +976,30 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
* there are still requests that haven't been acked.
*/
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
- !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
- (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+ (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
start_timer(qp);
while (qp->s_last != qp->s_acked) {
- wqe = get_swqe_ptr(qp, qp->s_last);
+ u32 s_last;
+
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
+ s_last = qp->s_last;
+ if (++s_last >= qp->s_size)
+ s_last = 0;
+ qp->s_last = s_last;
+ /* see post_send() */
+ barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
- struct qib_sge *sge = &wqe->sg_list[i];
+ struct rvt_sge *sge = &wqe->sg_list[i];
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
@@ -1025,25 +1007,23 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
- qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
}
/*
* If we were waiting for sends to complete before resending,
* and they are now complete, restart sending.
*/
- if (qp->s_flags & QIB_S_WAIT_PSN &&
+ if (qp->s_flags & RVT_S_WAIT_PSN &&
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
- qp->s_flags &= ~QIB_S_WAIT_PSN;
+ qp->s_flags &= ~RVT_S_WAIT_PSN;
qp->s_sending_psn = qp->s_psn;
qp->s_sending_hpsn = qp->s_psn - 1;
qib_schedule_send(qp);
}
}
-static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
{
qp->s_last_psn = psn;
}
@@ -1053,8 +1033,8 @@ static inline void update_last_psn(struct qib_qp *qp, u32 psn)
* This is similar to qib_send_complete but has to check to be sure
* that the SGEs are not being referenced if the SWQE is being resent.
*/
-static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
- struct qib_swqe *wqe,
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
struct qib_ibport *ibp)
{
struct ib_wc wc;
@@ -1067,13 +1047,21 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
*/
if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+ u32 s_last;
+
for (i = 0; i < wqe->wr.num_sge; i++) {
- struct qib_sge *sge = &wqe->sg_list[i];
+ struct rvt_sge *sge = &wqe->sg_list[i];
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
+ s_last = qp->s_last;
+ if (++s_last >= qp->s_size)
+ s_last = 0;
+ qp->s_last = s_last;
+ /* see post_send() */
+ barrier();
/* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
@@ -1081,12 +1069,10 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
- qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
} else
- ibp->n_rc_delayed_comp++;
+ this_cpu_inc(*ibp->rvp.rc_delayed_comp);
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, wqe->lpsn);
@@ -1100,7 +1086,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
qp->s_acked = qp->s_cur;
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
if (qp->s_acked != qp->s_tail) {
qp->s_state = OP(SEND_LAST);
qp->s_psn = wqe->psn;
@@ -1110,7 +1096,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
qp->s_acked = 0;
if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
qp->s_draining = 0;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
}
return wqe;
}
@@ -1126,19 +1112,19 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
* Called at interrupt level with the QP s_lock held.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
-static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u64 val, struct qib_ctxtdata *rcd)
{
struct qib_ibport *ibp;
enum ib_wc_status status;
- struct qib_swqe *wqe;
+ struct rvt_swqe *wqe;
int ret = 0;
u32 ack_psn;
int diff;
/* Remove QP from retry timer */
- if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
- qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
del_timer(&qp->s_timer);
}
@@ -1151,7 +1137,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
ack_psn = psn;
if (aeth >> 29)
ack_psn--;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
ibp = to_iport(qp->ibqp.device, qp->port_num);
/*
@@ -1186,11 +1172,11 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
/* Retry this request. */
- if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
- qp->r_flags |= QIB_R_RDMAR_SEQ;
+ if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= QIB_R_RSP_SEND;
+ qp->r_flags |= RVT_R_RSP_SEND;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
@@ -1213,14 +1199,14 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
qp->s_num_rd_atomic--;
/* Restart sending task if fence is complete */
- if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+ if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
!qp->s_num_rd_atomic) {
- qp->s_flags &= ~(QIB_S_WAIT_FENCE |
- QIB_S_WAIT_ACK);
+ qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+ RVT_S_WAIT_ACK);
qib_schedule_send(qp);
- } else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
- qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
- QIB_S_WAIT_ACK);
+ } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+ qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+ RVT_S_WAIT_ACK);
qib_schedule_send(qp);
}
}
@@ -1231,7 +1217,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
switch (aeth >> 29) {
case 0: /* ACK */
- ibp->n_rc_acks++;
+ this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
/*
* We are expecting more ACKs so
@@ -1248,8 +1234,8 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
qp->s_state = OP(SEND_LAST);
qp->s_psn = psn + 1;
}
- if (qp->s_flags & QIB_S_WAIT_ACK) {
- qp->s_flags &= ~QIB_S_WAIT_ACK;
+ if (qp->s_flags & RVT_S_WAIT_ACK) {
+ qp->s_flags &= ~RVT_S_WAIT_ACK;
qib_schedule_send(qp);
}
qib_get_credit(qp, aeth);
@@ -1260,10 +1246,10 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
goto bail;
case 1: /* RNR NAK */
- ibp->n_rnr_naks++;
+ ibp->rvp.n_rnr_naks++;
if (qp->s_acked == qp->s_tail)
goto bail;
- if (qp->s_flags & QIB_S_WAIT_RNR)
+ if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail;
if (qp->s_rnr_retry == 0) {
status = IB_WC_RNR_RETRY_EXC_ERR;
@@ -1275,12 +1261,12 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+ ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
reset_psn(qp, psn);
- qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
- qp->s_flags |= QIB_S_WAIT_RNR;
+ qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+ qp->s_flags |= RVT_S_WAIT_RNR;
qp->s_timer.function = qib_rc_rnr_retry;
qp->s_timer.expires = jiffies + usecs_to_jiffies(
ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
@@ -1296,7 +1282,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
QIB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
- ibp->n_seq_naks++;
+ ibp->rvp.n_seq_naks++;
/*
* Back up to the responder's expected PSN.
* Note that we might get a NAK in the middle of an
@@ -1309,21 +1295,21 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
case 1: /* Invalid Request */
status = IB_WC_REM_INV_REQ_ERR;
- ibp->n_other_naks++;
+ ibp->rvp.n_other_naks++;
goto class_b;
case 2: /* Remote Access Error */
status = IB_WC_REM_ACCESS_ERR;
- ibp->n_other_naks++;
+ ibp->rvp.n_other_naks++;
goto class_b;
case 3: /* Remote Operation Error */
status = IB_WC_REM_OP_ERR;
- ibp->n_other_naks++;
+ ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
qib_send_complete(qp, wqe, status);
- qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1349,18 +1335,18 @@ bail:
* We have seen an out of sequence RDMA read middle or last packet.
* This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
*/
-static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
struct qib_ctxtdata *rcd)
{
- struct qib_swqe *wqe;
+ struct rvt_swqe *wqe;
/* Remove QP from retry timer */
- if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
- qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
del_timer(&qp->s_timer);
}
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
while (qib_cmp24(psn, wqe->lpsn) > 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1370,11 +1356,11 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
wqe = do_rc_completion(qp, wqe, ibp);
}
- ibp->n_rdma_seq++;
- qp->r_flags |= QIB_R_RDMAR_SEQ;
+ ibp->rvp.n_rdma_seq++;
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= QIB_R_RSP_SEND;
+ qp->r_flags |= RVT_R_RSP_SEND;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
@@ -1399,12 +1385,12 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
static void qib_rc_rcv_resp(struct qib_ibport *ibp,
struct qib_other_headers *ohdr,
void *data, u32 tlen,
- struct qib_qp *qp,
+ struct rvt_qp *qp,
u32 opcode,
u32 psn, u32 hdrsize, u32 pmtu,
struct qib_ctxtdata *rcd)
{
- struct qib_swqe *wqe;
+ struct rvt_swqe *wqe;
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
enum ib_wc_status status;
unsigned long flags;
@@ -1425,7 +1411,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
* If send tasklet not running attempt to progress
* SDMA queue.
*/
- if (!(qp->s_flags & QIB_S_BUSY)) {
+ if (!(qp->s_flags & RVT_S_BUSY)) {
/* Acquire SDMA Lock */
spin_lock_irqsave(&ppd->sdma_lock, flags);
/* Invoke sdma make progress */
@@ -1437,11 +1423,12 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
}
spin_lock_irqsave(&qp->s_lock, flags);
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto ack_done;
/* Ignore invalid responses. */
- if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1460,15 +1447,15 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
* Skip everything other than the PSN we expect, if we are waiting
* for a reply to a restarted RDMA read or atomic op.
*/
- if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+ if (qp->r_flags & RVT_R_RDMAR_SEQ) {
if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
goto ack_done;
- qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+ qp->r_flags &= ~RVT_R_RDMAR_SEQ;
}
if (unlikely(qp->s_acked == qp->s_tail))
goto ack_done;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
status = IB_WC_SUCCESS;
switch (opcode) {
@@ -1487,7 +1474,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
hdrsize += 4;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/*
@@ -1515,10 +1502,10 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= QIB_S_TIMER;
+ qp->s_flags |= RVT_S_TIMER;
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
- if (qp->s_flags & QIB_S_WAIT_ACK) {
- qp->s_flags &= ~QIB_S_WAIT_ACK;
+ if (qp->s_flags & RVT_S_WAIT_ACK) {
+ qp->s_flags &= ~RVT_S_WAIT_ACK;
qib_schedule_send(qp);
}
@@ -1553,7 +1540,7 @@ read_middle:
* have to be careful to copy the data to the right
* location.
*/
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
wqe, psn, pmtu);
goto read_last;
@@ -1598,7 +1585,7 @@ ack_len_err:
ack_err:
if (qp->s_last == qp->s_acked) {
qib_send_complete(qp, wqe, status);
- qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1623,14 +1610,14 @@ bail:
*/
static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
void *data,
- struct qib_qp *qp,
+ struct rvt_qp *qp,
u32 opcode,
u32 psn,
int diff,
struct qib_ctxtdata *rcd)
{
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct qib_ack_entry *e;
+ struct rvt_ack_entry *e;
unsigned long flags;
u8 i, prev;
int old_req;
@@ -1642,7 +1629,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
* Don't queue the NAK if we already sent one.
*/
if (!qp->r_nak_state) {
- ibp->n_rc_seqnak++;
+ ibp->rvp.n_rc_seqnak++;
qp->r_nak_state = IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
qp->r_ack_psn = qp->r_psn;
@@ -1652,7 +1639,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
* Otherwise, we end up propagating congestion.
*/
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= QIB_R_RSP_NAK;
+ qp->r_flags |= RVT_R_RSP_NAK;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
@@ -1678,7 +1665,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
*/
e = NULL;
old_req = 1;
- ibp->n_rc_dupreq++;
+ ibp->rvp.n_rc_dupreq++;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1732,7 +1719,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
if (e->rdma_sge.mr) {
- qib_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
if (len != 0) {
@@ -1740,7 +1727,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
- ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+ ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
goto unlock_done;
@@ -1791,7 +1778,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
* which doesn't accept a RDMA read response or atomic
* response as an ACK for earlier SENDs or RDMA writes.
*/
- if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+ if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
qp->r_nak_state = 0;
qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
@@ -1805,7 +1792,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
break;
}
qp->s_ack_state = OP(ACKNOWLEDGE);
- qp->s_flags |= QIB_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_RESP_PENDING;
qp->r_nak_state = 0;
qib_schedule_send(qp);
@@ -1818,13 +1805,13 @@ send_ack:
return 0;
}
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
{
unsigned long flags;
int lastwqe;
spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = qib_error_qp(qp, err);
+ lastwqe = rvt_error_qp(qp, err);
spin_unlock_irqrestore(&qp->s_lock, flags);
if (lastwqe) {
@@ -1837,7 +1824,7 @@ void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
}
}
-static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
@@ -1862,7 +1849,7 @@ static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
* Called at interrupt level.
*/
void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
struct qib_other_headers *ohdr;
@@ -1948,8 +1935,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
break;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
- qp->r_flags |= QIB_R_COMM_EST;
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+ qp->r_flags |= RVT_R_COMM_EST;
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -2026,9 +2013,9 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- qib_put_ss(&qp->r_sge);
+ rvt_put_ss(&qp->r_sge);
qp->r_msn++;
- if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -2047,7 +2034,7 @@ send_last:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
break;
@@ -2069,7 +2056,7 @@ send_last:
int ok;
/* Check rkey & NAK */
- ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+ ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto nack_acc;
@@ -2096,7 +2083,7 @@ send_last:
goto send_last;
case OP(RDMA_READ_REQUEST): {
- struct qib_ack_entry *e;
+ struct rvt_ack_entry *e;
u32 len;
u8 next;
@@ -2114,7 +2101,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- qib_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
@@ -2125,7 +2112,7 @@ send_last:
int ok;
/* Check rkey & NAK */
- ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
goto nack_acc_unlck;
@@ -2157,7 +2144,7 @@ send_last:
qp->r_head_ack_queue = next;
/* Schedule the send tasklet. */
- qp->s_flags |= QIB_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_RESP_PENDING;
qib_schedule_send(qp);
goto sunlock;
@@ -2166,7 +2153,7 @@ send_last:
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
- struct qib_ack_entry *e;
+ struct rvt_ack_entry *e;
u64 vaddr;
atomic64_t *maddr;
u64 sdata;
@@ -2186,7 +2173,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- qib_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
@@ -2196,7 +2183,7 @@ send_last:
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
- if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc_unlck;
@@ -2208,7 +2195,7 @@ send_last:
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
be64_to_cpu(ateth->compare_data),
sdata);
- qib_put_mr(qp->r_sge.sge.mr);
+ rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
@@ -2221,7 +2208,7 @@ send_last:
qp->r_head_ack_queue = next;
/* Schedule the send tasklet. */
- qp->s_flags |= QIB_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_RESP_PENDING;
qib_schedule_send(qp);
goto sunlock;
@@ -2245,7 +2232,7 @@ rnr_nak:
qp->r_ack_psn = qp->r_psn;
/* Queue RNR NAK for later */
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= QIB_R_RSP_NAK;
+ qp->r_flags |= RVT_R_RSP_NAK;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
@@ -2257,7 +2244,7 @@ nack_op_err:
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= QIB_R_RSP_NAK;
+ qp->r_flags |= RVT_R_RSP_NAK;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
@@ -2271,7 +2258,7 @@ nack_inv:
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= QIB_R_RSP_NAK;
+ qp->r_flags |= RVT_R_RSP_NAK;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b1aa21bdd484..a5f07a64b228 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -79,16 +79,16 @@ const u32 ib_qib_rnr_table[32] = {
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
-static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
{
int i, j, ret;
struct ib_wc wc;
- struct qib_lkey_table *rkt;
- struct qib_pd *pd;
- struct qib_sge_state *ss;
+ struct rvt_lkey_table *rkt;
+ struct rvt_pd *pd;
+ struct rvt_sge_state *ss;
- rkt = &to_idev(qp->ibqp.device)->lk_table;
- pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+ rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+ pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
ss = &qp->r_sge;
ss->sg_list = qp->r_sg_list;
qp->r_len = 0;
@@ -96,7 +96,7 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
- if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
@@ -109,9 +109,9 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
bad_lkey:
while (j) {
- struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+ struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
@@ -120,7 +120,7 @@ bad_lkey:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
/* Signal solicited completion event. */
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
bail:
return ret;
@@ -136,19 +136,19 @@ bail:
*
* Can be called from interrupt level.
*/
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
{
unsigned long flags;
- struct qib_rq *rq;
- struct qib_rwq *wq;
- struct qib_srq *srq;
- struct qib_rwqe *wqe;
+ struct rvt_rq *rq;
+ struct rvt_rwq *wq;
+ struct rvt_srq *srq;
+ struct rvt_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
u32 tail;
int ret;
if (qp->ibqp.srq) {
- srq = to_isrq(qp->ibqp.srq);
+ srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
@@ -158,7 +158,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
@@ -174,7 +174,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
}
/* Make sure entry is read after head index is read. */
smp_rmb();
- wqe = get_rwqe_ptr(rq, tail);
+ wqe = rvt_get_rwqe_ptr(rq, tail);
/*
* Even though we update the tail index in memory, the verbs
* consumer is not supposed to post more entries until a
@@ -190,7 +190,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
qp->r_wr_id = wqe->wr_id;
ret = 1;
- set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+ set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
@@ -227,7 +227,7 @@ bail:
* Switch to alternate path.
* The QP s_lock should be held and interrupts disabled.
*/
-void qib_migrate_qp(struct qib_qp *qp)
+void qib_migrate_qp(struct rvt_qp *qp)
{
struct ib_event ev;
@@ -266,7 +266,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
* The s_lock will be acquired around the qib_migrate_qp() call.
*/
int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
- int has_grh, struct qib_qp *qp, u32 bth0)
+ int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
unsigned long flags;
@@ -279,7 +279,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
goto err;
guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ if (!gid_ok(&hdr->u.l.grh.dgid,
+ ibp->rvp.gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
@@ -311,7 +312,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
goto err;
guid = get_sguid(ibp,
qp->remote_ah_attr.grh.sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ if (!gid_ok(&hdr->u.l.grh.dgid,
+ ibp->rvp.gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
@@ -353,12 +355,15 @@ err:
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
-static void qib_ruc_loopback(struct qib_qp *sqp)
+static void qib_ruc_loopback(struct rvt_qp *sqp)
{
struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct qib_qp *qp;
- struct qib_swqe *wqe;
- struct qib_sge *sge;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = ppd->dd;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct rvt_qp *qp;
+ struct rvt_swqe *wqe;
+ struct rvt_sge *sge;
unsigned long flags;
struct ib_wc wc;
u64 sdata;
@@ -367,29 +372,33 @@ static void qib_ruc_loopback(struct qib_qp *sqp)
int release;
int ret;
+ rcu_read_lock();
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
- qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
+ if (!qp)
+ goto done;
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
- !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+ !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
goto unlock;
- sqp->s_flags |= QIB_S_BUSY;
+ sqp->s_flags |= RVT_S_BUSY;
again:
- if (sqp->s_last == sqp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
goto clr_busy;
- wqe = get_swqe_ptr(sqp, sqp->s_last);
+ wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work reqeust. */
- if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
@@ -407,9 +416,9 @@ again:
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+ if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
@@ -458,7 +467,7 @@ again:
goto inv_err;
if (wqe->length == 0)
break;
- if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr,
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
@@ -471,7 +480,7 @@ again:
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
- if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+ if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr,
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
@@ -489,7 +498,7 @@ again:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
- if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
@@ -502,7 +511,7 @@ again:
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->atomic_wr.swap);
- qib_put_mr(qp->r_sge.sge.mr);
+ rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -526,11 +535,11 @@ again:
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -543,9 +552,9 @@ again:
sqp->s_len -= len;
}
if (release)
- qib_put_ss(&qp->r_sge);
+ rvt_put_ss(&qp->r_sge);
- if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -561,12 +570,12 @@ again:
wc.sl = qp->remote_ah_attr.sl;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->n_loop_pkts++;
+ ibp->rvp.n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
qib_send_complete(sqp, wqe, send_status);
@@ -576,7 +585,7 @@ rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
- ibp->n_rnr_naks++;
+ ibp->rvp.n_rnr_naks++;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
@@ -588,9 +597,9 @@ rnr_nak:
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
- sqp->s_flags |= QIB_S_WAIT_RNR;
+ sqp->s_flags |= RVT_S_WAIT_RNR;
sqp->s_timer.function = qib_rc_rnr_retry;
sqp->s_timer.expires = jiffies +
usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
@@ -618,9 +627,9 @@ serr:
spin_lock_irqsave(&sqp->s_lock, flags);
qib_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+ int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
- sqp->s_flags &= ~QIB_S_BUSY;
+ sqp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
@@ -633,12 +642,11 @@ serr:
goto done;
}
clr_busy:
- sqp->s_flags &= ~QIB_S_BUSY;
+ sqp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
- if (qp && atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rcu_read_unlock();
}
/**
@@ -663,7 +671,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
hdr->next_hdr = IB_GRH_NEXT_HDR;
hdr->hop_limit = grh->hop_limit;
/* The SGID is 32-bit aligned. */
- hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
hdr->sgid.global.interface_id = grh->sgid_index ?
ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
hdr->dgid = grh->dgid;
@@ -672,9 +680,10 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
u32 bth0, u32 bth2)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
u16 lrh0;
u32 nwords;
@@ -685,17 +694,18 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = QIB_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+ qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
&qp->remote_ah_attr.grh,
qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
}
lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
qp->remote_ah_attr.sl << 4;
- qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+ priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ priv->s_hdr->lrh[2] =
+ cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
qp->remote_ah_attr.src_path_bits);
bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
@@ -707,20 +717,29 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
}
+void _qib_do_send(struct work_struct *work)
+{
+ struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
+ s_work);
+ struct rvt_qp *qp = priv->owner;
+
+ qib_do_send(qp);
+}
+
/**
* qib_do_send - perform a send on a QP
- * @work: contains a pointer to the QP
+ * @qp: pointer to the QP
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP (tasklet).
* Otherwise, two threads could send packets out of order.
*/
-void qib_do_send(struct work_struct *work)
+void qib_do_send(struct rvt_qp *qp)
{
- struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+ struct qib_qp_priv *priv = qp->priv;
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- int (*make_req)(struct qib_qp *qp);
+ int (*make_req)(struct rvt_qp *qp);
unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
@@ -745,50 +764,59 @@ void qib_do_send(struct work_struct *work)
return;
}
- qp->s_flags |= QIB_S_BUSY;
-
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->s_flags |= RVT_S_BUSY;
do {
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
/*
* If the packet cannot be sent now, return and
* the send tasklet will be woken up later.
*/
- if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
+ if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size))
- break;
+ return;
/* Record that s_hdr is empty. */
qp->s_hdrwords = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
}
} while (make_req(qp));
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
/*
* This should be called with s_lock held.
*/
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
unsigned i;
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
+ last = qp->s_last;
+ old_last = last;
+ if (++last >= qp->s_size)
+ last = 0;
+ qp->s_last = last;
+ /* See post_send() */
+ barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
- struct qib_sge *sge = &wqe->sg_list[i];
+ struct rvt_sge *sge = &wqe->sg_list[i];
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+ atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
@@ -800,15 +828,10 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
wc.qp = &qp->ibqp;
if (status == IB_WC_SUCCESS)
wc.byte_len = wqe->length;
- qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
status != IB_WC_SUCCESS);
}
- last = qp->s_last;
- old_last = last;
- if (++last >= qp->s_size)
- last = 0;
- qp->s_last = last;
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index c6d6a54d2e19..891873b38a1e 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -513,7 +513,9 @@ int qib_sdma_running(struct qib_pportdata *ppd)
static void complete_sdma_err_req(struct qib_pportdata *ppd,
struct qib_verbs_txreq *tx)
{
- atomic_inc(&tx->qp->s_dma_busy);
+ struct qib_qp_priv *priv = tx->qp->priv;
+
+ atomic_inc(&priv->s_dma_busy);
/* no sdma descriptors, so no unmap_desc */
tx->txreq.start_idx = 0;
tx->txreq.next_descq_idx = 0;
@@ -531,18 +533,19 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd,
* 3) The SGE addresses are suitable for passing to dma_map_single().
*/
int qib_sdma_verbs_send(struct qib_pportdata *ppd,
- struct qib_sge_state *ss, u32 dwords,
+ struct rvt_sge_state *ss, u32 dwords,
struct qib_verbs_txreq *tx)
{
unsigned long flags;
- struct qib_sge *sge;
- struct qib_qp *qp;
+ struct rvt_sge *sge;
+ struct rvt_qp *qp;
int ret = 0;
u16 tail;
__le64 *descqp;
u64 sdmadesc[2];
u32 dwoffset;
dma_addr_t addr;
+ struct qib_qp_priv *priv;
spin_lock_irqsave(&ppd->sdma_lock, flags);
@@ -621,7 +624,7 @@ retry:
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -644,8 +647,8 @@ retry:
descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
-
- atomic_inc(&tx->qp->s_dma_busy);
+ priv = tx->qp->priv;
+ atomic_inc(&priv->s_dma_busy);
tx->txreq.next_descq_idx = tail;
ppd->dd->f_sdma_update_tail(ppd, tail);
ppd->sdma_descq_added += tx->txreq.sg_count;
@@ -663,13 +666,14 @@ unmap:
unmap_desc(ppd, tail);
}
qp = tx->qp;
+ priv = qp->priv;
qib_put_txreq(tx);
spin_lock(&qp->r_lock);
spin_lock(&qp->s_lock);
if (qp->ibqp.qp_type == IB_QPT_RC) {
/* XXX what about error sending RDMA read responses? */
- if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
- qib_error_qp(qp, IB_WC_GENERAL_ERR);
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
+ rvt_error_qp(qp, IB_WC_GENERAL_ERR);
} else if (qp->s_wqe)
qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock(&qp->s_lock);
@@ -679,8 +683,9 @@ unmap:
busy:
qp = tx->qp;
+ priv = qp->priv;
spin_lock(&qp->s_lock);
- if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct qib_ibdev *dev;
/*
@@ -690,19 +695,19 @@ busy:
*/
tx->ss = ss;
tx->dwords = dwords;
- qp->s_tx = tx;
+ priv->s_tx = tx;
dev = &ppd->dd->verbs_dev;
- spin_lock(&dev->pending_lock);
- if (list_empty(&qp->iowait)) {
+ spin_lock(&dev->rdi.pending_lock);
+ if (list_empty(&priv->iowait)) {
struct qib_ibport *ibp;
ibp = &ppd->ibport_data;
- ibp->n_dmawait++;
- qp->s_flags |= QIB_S_WAIT_DMA_DESC;
- list_add_tail(&qp->iowait, &dev->dmawait);
+ ibp->rvp.n_dmawait++;
+ qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+ list_add_tail(&priv->iowait, &dev->dmawait);
}
- spin_unlock(&dev->pending_lock);
- qp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock(&dev->rdi.pending_lock);
+ qp->s_flags &= ~RVT_S_BUSY;
spin_unlock(&qp->s_lock);
ret = -EBUSY;
} else {
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
deleted file mode 100644
index d6235931a1ba..000000000000
--- a/drivers/infiniband/hw/qib/qib_srq.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
-{
- struct qib_srq *srq = to_isrq(ibsrq);
- struct qib_rwq *wq;
- unsigned long flags;
- int ret;
-
- for (; wr; wr = wr->next) {
- struct qib_rwqe *wqe;
- u32 next;
- int i;
-
- if ((unsigned) wr->num_sge > srq->rq.max_sge) {
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
-
- spin_lock_irqsave(&srq->rq.lock, flags);
- wq = srq->rq.wq;
- next = wq->head + 1;
- if (next >= srq->rq.size)
- next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- *bad_wr = wr;
- ret = -ENOMEM;
- goto bail;
- }
-
- wqe = get_rwqe_ptr(&srq->rq, wq->head);
- wqe->wr_id = wr->wr_id;
- wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
- /* Make sure queue entry is written before the head index. */
- smp_wmb();
- wq->head = next;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- }
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * qib_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libibverbs when creating a user SRQ
- */
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata)
-{
- struct qib_ibdev *dev = to_idev(ibpd->device);
- struct qib_srq *srq;
- u32 sz;
- struct ib_srq *ret;
-
- if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
- ret = ERR_PTR(-ENOSYS);
- goto done;
- }
-
- if (srq_init_attr->attr.max_sge == 0 ||
- srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
- srq_init_attr->attr.max_wr == 0 ||
- srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
- ret = ERR_PTR(-EINVAL);
- goto done;
- }
-
- srq = kmalloc(sizeof(*srq), GFP_KERNEL);
- if (!srq) {
- ret = ERR_PTR(-ENOMEM);
- goto done;
- }
-
- /*
- * Need to use vmalloc() if we want to support large #s of entries.
- */
- srq->rq.size = srq_init_attr->attr.max_wr + 1;
- srq->rq.max_sge = srq_init_attr->attr.max_sge;
- sz = sizeof(struct ib_sge) * srq->rq.max_sge +
- sizeof(struct qib_rwqe);
- srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
- if (!srq->rq.wq) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_srq;
- }
-
- /*
- * Return the address of the RWQ as the offset to mmap.
- * See qib_mmap() for details.
- */
- if (udata && udata->outlen >= sizeof(__u64)) {
- int err;
- u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
-
- srq->ip =
- qib_create_mmap_info(dev, s, ibpd->uobject->context,
- srq->rq.wq);
- if (!srq->ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_wq;
- }
-
- err = ib_copy_to_udata(udata, &srq->ip->offset,
- sizeof(srq->ip->offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_ip;
- }
- } else
- srq->ip = NULL;
-
- /*
- * ib_create_srq() will initialize srq->ibsrq.
- */
- spin_lock_init(&srq->rq.lock);
- srq->rq.wq->head = 0;
- srq->rq.wq->tail = 0;
- srq->limit = srq_init_attr->attr.srq_limit;
-
- spin_lock(&dev->n_srqs_lock);
- if (dev->n_srqs_allocated == ib_qib_max_srqs) {
- spin_unlock(&dev->n_srqs_lock);
- ret = ERR_PTR(-ENOMEM);
- goto bail_ip;
- }
-
- dev->n_srqs_allocated++;
- spin_unlock(&dev->n_srqs_lock);
-
- if (srq->ip) {
- spin_lock_irq(&dev->pending_lock);
- list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
- }
-
- ret = &srq->ibsrq;
- goto done;
-
-bail_ip:
- kfree(srq->ip);
-bail_wq:
- vfree(srq->rq.wq);
-bail_srq:
- kfree(srq);
-done:
- return ret;
-}
-
-/**
- * qib_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for libibverbs.so
- */
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask,
- struct ib_udata *udata)
-{
- struct qib_srq *srq = to_isrq(ibsrq);
- struct qib_rwq *wq;
- int ret = 0;
-
- if (attr_mask & IB_SRQ_MAX_WR) {
- struct qib_rwq *owq;
- struct qib_rwqe *p;
- u32 sz, size, n, head, tail;
-
- /* Check that the requested sizes are below the limits. */
- if ((attr->max_wr > ib_qib_max_srq_wrs) ||
- ((attr_mask & IB_SRQ_LIMIT) ?
- attr->srq_limit : srq->limit) > attr->max_wr) {
- ret = -EINVAL;
- goto bail;
- }
-
- sz = sizeof(struct qib_rwqe) +
- srq->rq.max_sge * sizeof(struct ib_sge);
- size = attr->max_wr + 1;
- wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
- if (!wq) {
- ret = -ENOMEM;
- goto bail;
- }
-
- /* Check that we can write the offset to mmap. */
- if (udata && udata->inlen >= sizeof(__u64)) {
- __u64 offset_addr;
- __u64 offset = 0;
-
- ret = ib_copy_from_udata(&offset_addr, udata,
- sizeof(offset_addr));
- if (ret)
- goto bail_free;
- udata->outbuf =
- (void __user *) (unsigned long) offset_addr;
- ret = ib_copy_to_udata(udata, &offset,
- sizeof(offset));
- if (ret)
- goto bail_free;
- }
-
- spin_lock_irq(&srq->rq.lock);
- /*
- * validate head and tail pointer values and compute
- * the number of remaining WQEs.
- */
- owq = srq->rq.wq;
- head = owq->head;
- tail = owq->tail;
- if (head >= srq->rq.size || tail >= srq->rq.size) {
- ret = -EINVAL;
- goto bail_unlock;
- }
- n = head;
- if (n < tail)
- n += srq->rq.size - tail;
- else
- n -= tail;
- if (size <= n) {
- ret = -EINVAL;
- goto bail_unlock;
- }
- n = 0;
- p = wq->wq;
- while (tail != head) {
- struct qib_rwqe *wqe;
- int i;
-
- wqe = get_rwqe_ptr(&srq->rq, tail);
- p->wr_id = wqe->wr_id;
- p->num_sge = wqe->num_sge;
- for (i = 0; i < wqe->num_sge; i++)
- p->sg_list[i] = wqe->sg_list[i];
- n++;
- p = (struct qib_rwqe *)((char *) p + sz);
- if (++tail >= srq->rq.size)
- tail = 0;
- }
- srq->rq.wq = wq;
- srq->rq.size = size;
- wq->head = n;
- wq->tail = 0;
- if (attr_mask & IB_SRQ_LIMIT)
- srq->limit = attr->srq_limit;
- spin_unlock_irq(&srq->rq.lock);
-
- vfree(owq);
-
- if (srq->ip) {
- struct qib_mmap_info *ip = srq->ip;
- struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
- u32 s = sizeof(struct qib_rwq) + size * sz;
-
- qib_update_mmap_info(dev, ip, s, wq);
-
- /*
- * Return the offset to mmap.
- * See qib_mmap() for details.
- */
- if (udata && udata->inlen >= sizeof(__u64)) {
- ret = ib_copy_to_udata(udata, &ip->offset,
- sizeof(ip->offset));
- if (ret)
- goto bail;
- }
-
- /*
- * Put user mapping info onto the pending list
- * unless it already is on the list.
- */
- spin_lock_irq(&dev->pending_lock);
- if (list_empty(&ip->pending_mmaps))
- list_add(&ip->pending_mmaps,
- &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
- }
- } else if (attr_mask & IB_SRQ_LIMIT) {
- spin_lock_irq(&srq->rq.lock);
- if (attr->srq_limit >= srq->rq.size)
- ret = -EINVAL;
- else
- srq->limit = attr->srq_limit;
- spin_unlock_irq(&srq->rq.lock);
- }
- goto bail;
-
-bail_unlock:
- spin_unlock_irq(&srq->rq.lock);
-bail_free:
- vfree(wq);
-bail:
- return ret;
-}
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
- struct qib_srq *srq = to_isrq(ibsrq);
-
- attr->max_wr = srq->rq.size - 1;
- attr->max_sge = srq->rq.max_sge;
- attr->srq_limit = srq->limit;
- return 0;
-}
-
-/**
- * qib_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int qib_destroy_srq(struct ib_srq *ibsrq)
-{
- struct qib_srq *srq = to_isrq(ibsrq);
- struct qib_ibdev *dev = to_idev(ibsrq->device);
-
- spin_lock(&dev->n_srqs_lock);
- dev->n_srqs_allocated--;
- spin_unlock(&dev->n_srqs_lock);
- if (srq->ip)
- kref_put(&srq->ip->ref, qib_release_mmap_info);
- else
- vfree(srq->rq.wq);
- kfree(srq);
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 81f56cdff2bc..fe4cf5e4acec 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -406,7 +406,13 @@ static struct kobj_type qib_sl2vl_ktype = {
#define QIB_DIAGC_ATTR(N) \
static struct qib_diagc_attr qib_diagc_attr_##N = { \
.attr = { .name = __stringify(N), .mode = 0664 }, \
- .counter = offsetof(struct qib_ibport, n_##N) \
+ .counter = offsetof(struct qib_ibport, rvp.n_##N) \
+ }
+
+#define QIB_DIAGC_ATTR_PER_CPU(N) \
+ static struct qib_diagc_attr qib_diagc_attr_##N = { \
+ .attr = { .name = __stringify(N), .mode = 0664 }, \
+ .counter = offsetof(struct qib_ibport, rvp.z_##N) \
}
struct qib_diagc_attr {
@@ -414,10 +420,11 @@ struct qib_diagc_attr {
size_t counter;
};
+QIB_DIAGC_ATTR_PER_CPU(rc_acks);
+QIB_DIAGC_ATTR_PER_CPU(rc_qacks);
+QIB_DIAGC_ATTR_PER_CPU(rc_delayed_comp);
+
QIB_DIAGC_ATTR(rc_resends);
-QIB_DIAGC_ATTR(rc_acks);
-QIB_DIAGC_ATTR(rc_qacks);
-QIB_DIAGC_ATTR(rc_delayed_comp);
QIB_DIAGC_ATTR(seq_naks);
QIB_DIAGC_ATTR(rdma_seq);
QIB_DIAGC_ATTR(rnr_naks);
@@ -449,6 +456,35 @@ static struct attribute *diagc_default_attributes[] = {
NULL
};
+static u64 get_all_cpu_total(u64 __percpu *cntr)
+{
+ int cpu;
+ u64 counter = 0;
+
+ for_each_possible_cpu(cpu)
+ counter += *per_cpu_ptr(cntr, cpu);
+ return counter;
+}
+
+#define def_write_per_cpu(cntr) \
+static void write_per_cpu_##cntr(struct qib_pportdata *ppd, u32 data) \
+{ \
+ struct qib_devdata *dd = ppd->dd; \
+ struct qib_ibport *qibp = &ppd->ibport_data; \
+ /* A write can only zero the counter */ \
+ if (data == 0) \
+ qibp->rvp.z_##cntr = get_all_cpu_total(qibp->rvp.cntr); \
+ else \
+ qib_dev_err(dd, "Per CPU cntrs can only be zeroed"); \
+}
+
+def_write_per_cpu(rc_acks)
+def_write_per_cpu(rc_qacks)
+def_write_per_cpu(rc_delayed_comp)
+
+#define READ_PER_CPU_CNTR(cntr) (get_all_cpu_total(qibp->rvp.cntr) - \
+ qibp->rvp.z_##cntr)
+
static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
@@ -458,7 +494,16 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
container_of(kobj, struct qib_pportdata, diagc_kobj);
struct qib_ibport *qibp = &ppd->ibport_data;
- return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+ if (!strncmp(dattr->attr.name, "rc_acks", 7))
+ return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks));
+ else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+ return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks));
+ else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+ return sprintf(buf, "%llu\n",
+ READ_PER_CPU_CNTR(rc_delayed_comp));
+ else
+ return sprintf(buf, "%u\n",
+ *(u32 *)((char *)qibp + dattr->counter));
}
static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
@@ -475,7 +520,15 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
ret = kstrtou32(buf, 0, &val);
if (ret)
return ret;
- *(u32 *)((char *) qibp + dattr->counter) = val;
+
+ if (!strncmp(dattr->attr.name, "rc_acks", 7))
+ write_per_cpu_rc_acks(ppd, val);
+ else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+ write_per_cpu_rc_qacks(ppd, val);
+ else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+ write_per_cpu_rc_delayed_comp(ppd, val);
+ else
+ *(u32 *)((char *)qibp + dattr->counter) = val;
return size;
}
@@ -502,7 +555,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
@@ -511,7 +564,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
int ret;
@@ -533,7 +586,7 @@ static ssize_t show_boardversion(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
@@ -545,7 +598,7 @@ static ssize_t show_localbus_info(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
@@ -557,7 +610,7 @@ static ssize_t show_nctxts(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
/* Return the number of user ports (contexts) available. */
@@ -572,7 +625,7 @@ static ssize_t show_nfreectxts(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
@@ -583,7 +636,7 @@ static ssize_t show_serial(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
buf[sizeof(dd->serial)] = '\0';
@@ -597,7 +650,7 @@ static ssize_t store_chip_reset(struct device *device,
size_t count)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
int ret;
@@ -618,7 +671,7 @@ static ssize_t show_tempsense(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
- container_of(device, struct qib_ibdev, ibdev.dev);
+ container_of(device, struct qib_ibdev, rdi.ibdev.dev);
struct qib_devdata *dd = dd_from_dev(dev);
int ret;
int idx;
@@ -778,7 +831,7 @@ bail:
*/
int qib_verbs_register_sysfs(struct qib_devdata *dd)
{
- struct ib_device *dev = &dd->verbs_dev.ibdev;
+ struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
int i, ret;
for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 06a564589c35..7bdbc79ceaa3 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -41,61 +41,62 @@
* qib_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
*
+ * Assumes the s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_uc_req(struct qib_qp *qp)
+int qib_make_uc_req(struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
- struct qib_swqe *wqe;
- unsigned long flags;
+ struct rvt_swqe *wqe;
u32 hwords;
u32 bth0;
u32 len;
u32 pmtu = qp->pmtu;
int ret = 0;
- spin_lock_irqsave(&qp->s_lock, flags);
-
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
- if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
- if (atomic_read(&qp->s_dma_busy)) {
- qp->s_flags |= QIB_S_WAIT_DMA;
+ if (atomic_read(&priv->s_dma_busy)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
- ohdr = &qp->s_hdr->u.oth;
+ ohdr = &priv->s_hdr->u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr->u.l.oth;
+ ohdr = &priv->s_hdr->u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 0;
/* Get the next send request. */
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
- if (!(ib_qib_state_ops[qp->state] &
- QIB_PROCESS_NEXT_SEND_OK))
+ if (!(ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
- if (qp->s_cur == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail;
/*
* Start a new request.
*/
- wqe->psn = qp->s_next_psn;
- qp->s_psn = qp->s_next_psn;
+ qp->s_psn = wqe->psn;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -214,15 +215,11 @@ int qib_make_uc_req(struct qib_qp *qp)
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
- qp->s_next_psn++ & QIB_PSN_MASK);
+ qp->s_psn++ & QIB_PSN_MASK);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
- qp->s_flags &= ~QIB_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->s_flags &= ~RVT_S_BUSY;
return ret;
}
@@ -240,7 +237,7 @@ unlock:
* Called at interrupt level.
*/
void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_other_headers *ohdr;
u32 opcode;
@@ -278,10 +275,10 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
inv:
if (qp->r_state == OP(SEND_FIRST) ||
qp->r_state == OP(SEND_MIDDLE)) {
- set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+ set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
} else
- qib_put_ss(&qp->r_sge);
+ rvt_put_ss(&qp->r_sge);
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
@@ -328,8 +325,8 @@ inv:
goto inv;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
- qp->r_flags |= QIB_R_COMM_EST;
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+ qp->r_flags |= RVT_R_COMM_EST;
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -346,7 +343,7 @@ inv:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
- if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+ if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
qp->r_sge = qp->s_rdma_read_sge;
else {
ret = qib_get_rwqe(qp, 0);
@@ -400,7 +397,7 @@ send_last:
goto rewind;
wc.opcode = IB_WC_RECV;
qib_copy_sge(&qp->r_sge, data, tlen, 0);
- qib_put_ss(&qp->s_rdma_read_sge);
+ rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -414,7 +411,7 @@ last_imm:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
break;
@@ -438,7 +435,7 @@ rdma_first:
int ok;
/* Check rkey */
- ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+ ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto drop;
@@ -483,8 +480,8 @@ rdma_last_imm:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- qib_put_ss(&qp->s_rdma_read_sge);
+ if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+ rvt_put_ss(&qp->s_rdma_read_sge);
else {
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
@@ -495,7 +492,7 @@ rdma_last_imm:
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- qib_put_ss(&qp->r_sge);
+ rvt_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
@@ -511,7 +508,7 @@ rdma_last:
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- qib_put_ss(&qp->r_sge);
+ rvt_put_ss(&qp->r_sge);
break;
default:
@@ -523,10 +520,10 @@ rdma_last:
return;
rewind:
- set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+ set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
drop:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
return;
op_err:
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 59193f67ea78..d9502137de62 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -32,6 +32,7 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
#include "qib.h"
#include "qib_mad.h"
@@ -46,22 +47,26 @@
* Note that the receive interrupt handler may be calling qib_ud_rcv()
* while this is being called.
*/
-static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
{
struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct qib_pportdata *ppd;
- struct qib_qp *qp;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = ppd->dd;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct rvt_qp *qp;
struct ib_ah_attr *ah_attr;
unsigned long flags;
- struct qib_sge_state ssge;
- struct qib_sge *sge;
+ struct rvt_sge_state ssge;
+ struct rvt_sge *sge;
struct ib_wc wc;
u32 length;
enum ib_qp_type sqptype, dqptype;
- qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
if (!qp) {
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
+ rcu_read_unlock();
return;
}
@@ -71,12 +76,12 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
IB_QPT_UD : qp->ibqp.qp_type;
if (dqptype != sqptype ||
- !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
- ibp->n_pkt_drops++;
+ !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ ibp->rvp.n_pkt_drops++;
goto drop;
}
- ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+ ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -140,8 +145,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
/*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_flags & QIB_R_REUSE_SGE)
- qp->r_flags &= ~QIB_R_REUSE_SGE;
+ if (qp->r_flags & RVT_R_REUSE_SGE)
+ qp->r_flags &= ~RVT_R_REUSE_SGE;
else {
int ret;
@@ -152,14 +157,14 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
}
if (!ret) {
if (qp->ibqp.qp_num == 0)
- ibp->n_vl15_dropped++;
+ ibp->rvp.n_vl15_dropped++;
goto bail_unlock;
}
}
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_flags |= QIB_R_REUSE_SGE;
- ibp->n_pkt_drops++;
+ qp->r_flags |= RVT_R_REUSE_SGE;
+ ibp->rvp.n_pkt_drops++;
goto bail_unlock;
}
@@ -189,7 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
if (--ssge.num_sge)
*sge = *ssge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -201,8 +206,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
}
length -= len;
}
- qib_put_ss(&qp->r_sge);
- if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ rvt_put_ss(&qp->r_sge);
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -216,30 +221,31 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
swqe->wr.send_flags & IB_SEND_SOLICITED);
- ibp->n_loop_pkts++;
+ ibp->rvp.n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
drop:
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rcu_read_unlock();
}
/**
* qib_make_ud_req - construct a UD request packet
* @qp: the QP
*
+ * Assumes the s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_ud_req(struct qib_qp *qp)
+int qib_make_ud_req(struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
- struct qib_swqe *wqe;
- unsigned long flags;
+ struct rvt_swqe *wqe;
u32 nwords;
u32 extra_bytes;
u32 bth0;
@@ -248,28 +254,29 @@ int qib_make_ud_req(struct qib_qp *qp)
int ret = 0;
int next_cur;
- spin_lock_irqsave(&qp->s_lock, flags);
-
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
- if (atomic_read(&qp->s_dma_busy)) {
- qp->s_flags |= QIB_S_WAIT_DMA;
+ if (atomic_read(&priv->s_dma_busy)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
- if (qp->s_cur == qp->s_head)
+ /* see post_one_send() */
+ smp_read_barrier_depends();
+ if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail;
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
next_cur = qp->s_cur + 1;
if (next_cur >= qp->s_size)
next_cur = 0;
@@ -277,9 +284,9 @@ int qib_make_ud_req(struct qib_qp *qp)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
- if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+ ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE))
this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
@@ -287,6 +294,7 @@ int qib_make_ud_req(struct qib_qp *qp)
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
+ unsigned long flags;
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
@@ -294,11 +302,12 @@ int qib_make_ud_req(struct qib_qp *qp)
* XXX Instead of waiting, we could queue a
* zero length descriptor so we get a callback.
*/
- if (atomic_read(&qp->s_dma_busy)) {
- qp->s_flags |= QIB_S_WAIT_DMA;
+ if (atomic_read(&priv->s_dma_busy)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
qp->s_cur = next_cur;
+ local_irq_save(flags);
spin_unlock_irqrestore(&qp->s_lock, flags);
qib_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -324,11 +333,11 @@ int qib_make_ud_req(struct qib_qp *qp)
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
- qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+ qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
&ah_attr->grh,
qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
- ohdr = &qp->s_hdr->u.l.oth;
+ ohdr = &priv->s_hdr->u.l.oth;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
@@ -336,7 +345,7 @@ int qib_make_ud_req(struct qib_qp *qp)
} else {
/* Header size in 32-bit words. */
lrh0 = QIB_LRH_BTH;
- ohdr = &qp->s_hdr->u.oth;
+ ohdr = &priv->s_hdr->u.oth;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
@@ -349,15 +358,16 @@ int qib_make_ud_req(struct qib_qp *qp)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
else
lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
- qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
- qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+ priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ priv->s_hdr->lrh[2] =
+ cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
lid = ppd->lid;
if (lid) {
lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
- qp->s_hdr->lrh[3] = cpu_to_be16(lid);
+ priv->s_hdr->lrh[3] = cpu_to_be16(lid);
} else
- qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
+ priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
@@ -368,11 +378,11 @@ int qib_make_ud_req(struct qib_qp *qp)
/*
* Use the multicast QP if the destination LID is a multicast LID.
*/
- ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
- ah_attr->dlid != QIB_PERMISSIVE_LID ?
+ ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+ ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
cpu_to_be32(QIB_MULTICAST_QPN) :
cpu_to_be32(wqe->ud_wr.remote_qpn);
- ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+ ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
@@ -382,13 +392,9 @@ int qib_make_ud_req(struct qib_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
- qp->s_flags &= ~QIB_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->s_flags &= ~RVT_S_BUSY;
return ret;
}
@@ -426,7 +432,7 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
* Called at interrupt level.
*/
void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_other_headers *ohdr;
int opcode;
@@ -446,7 +452,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
}
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
- src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+ src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
/*
* Get the number of bytes the message was padded by
@@ -531,8 +537,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
/*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_flags & QIB_R_REUSE_SGE)
- qp->r_flags &= ~QIB_R_REUSE_SGE;
+ if (qp->r_flags & RVT_R_REUSE_SGE)
+ qp->r_flags &= ~RVT_R_REUSE_SGE;
else {
int ret;
@@ -543,13 +549,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
}
if (!ret) {
if (qp->ibqp.qp_num == 0)
- ibp->n_vl15_dropped++;
+ ibp->rvp.n_vl15_dropped++;
return;
}
}
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_flags |= QIB_R_REUSE_SGE;
+ qp->r_flags |= RVT_R_REUSE_SGE;
goto drop;
}
if (has_grh) {
@@ -559,8 +565,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
- qib_put_ss(&qp->r_sge);
- if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ rvt_put_ss(&qp->r_sge);
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -576,15 +582,15 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
- wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+ wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
return;
drop:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index baf1e42b6896..cbf6200e6afc 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -41,6 +41,7 @@
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/vmalloc.h>
+#include <rdma/rdma_vt.h>
#include "qib.h"
#include "qib_common.h"
@@ -49,8 +50,8 @@ static unsigned int ib_qib_qp_table_size = 256;
module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");
-unsigned int ib_qib_lkey_table_size = 16;
-module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+static unsigned int qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, qib_lkey_table_size, uint,
S_IRUGO);
MODULE_PARM_DESC(lkey_table_size,
"LKEY table size in bits (2^n, 1 <= n <= 23)");
@@ -113,36 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; qib_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = 0,
- [IB_QPS_INIT] = QIB_POST_RECV_OK,
- [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
- [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
- QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
- QIB_PROCESS_NEXT_SEND_OK,
- [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
- QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
- [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
- QIB_POST_SEND_OK | QIB_FLUSH_SEND,
- [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
- QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-};
-
-struct qib_ucontext {
- struct ib_ucontext ibucontext;
-};
-
-static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
- *ibucontext)
-{
- return container_of(ibucontext, struct qib_ucontext, ibucontext);
-}
-
-/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_qib_wc_opcode[] = {
@@ -166,9 +137,9 @@ __be64 ib_qib_sys_image_guid;
* @data: the data to copy
* @length: the length of the data
*/
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
{
- struct qib_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
while (length) {
u32 len = sge->length;
@@ -184,11 +155,11 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -208,9 +179,9 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
* @ss: the SGE state
* @length: the number of bytes to skip
*/
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
{
- struct qib_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
while (length) {
u32 len = sge->length;
@@ -225,11 +196,11 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- qib_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -248,10 +219,10 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
* Don't modify the qib_sge_state to get the count.
* Return zero if any of the segments is not aligned.
*/
-static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
{
- struct qib_sge *sg_list = ss->sg_list;
- struct qib_sge sge = ss->sge;
+ struct rvt_sge *sg_list = ss->sg_list;
+ struct rvt_sge sge = ss->sge;
u8 num_sge = ss->num_sge;
u32 ndesc = 1; /* count the header */
@@ -276,7 +247,7 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
if (--num_sge)
sge = *sg_list++;
} else if (sge.length == 0 && sge.mr->lkey) {
- if (++sge.n >= QIB_SEGSZ) {
+ if (++sge.n >= RVT_SEGSZ) {
if (++sge.m >= sge.mr->mapsz)
break;
sge.n = 0;
@@ -294,9 +265,9 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
/*
* Copy from the SGEs to the data buffer.
*/
-static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
{
- struct qib_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
while (length) {
u32 len = sge->length;
@@ -314,7 +285,7 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -330,242 +301,6 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
}
/**
- * qib_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
- int *scheduled)
-{
- struct qib_swqe *wqe;
- u32 next;
- int i;
- int j;
- int acc;
- int ret;
- unsigned long flags;
- struct qib_lkey_table *rkt;
- struct qib_pd *pd;
- int avoid_schedule = 0;
-
- spin_lock_irqsave(&qp->s_lock, flags);
-
- /* Check that state is OK to post send. */
- if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
- goto bail_inval;
-
- /* IB spec says that num_sge == 0 is OK. */
- if (wr->num_sge > qp->s_max_sge)
- goto bail_inval;
-
- /*
- * Don't allow RDMA reads or atomic operations on UC or
- * undefined operations.
- * Make sure buffer is large enough to hold the result for atomics.
- */
- if (wr->opcode == IB_WR_REG_MR) {
- if (qib_reg_mr(qp, reg_wr(wr)))
- goto bail_inval;
- } else if (qp->ibqp.qp_type == IB_QPT_UC) {
- if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
- goto bail_inval;
- } else if (qp->ibqp.qp_type != IB_QPT_RC) {
- /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
- if (wr->opcode != IB_WR_SEND &&
- wr->opcode != IB_WR_SEND_WITH_IMM)
- goto bail_inval;
- /* Check UD destination address PD */
- if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
- goto bail_inval;
- } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
- goto bail_inval;
- else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
- (wr->num_sge == 0 ||
- wr->sg_list[0].length < sizeof(u64) ||
- wr->sg_list[0].addr & (sizeof(u64) - 1)))
- goto bail_inval;
- else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
- goto bail_inval;
-
- next = qp->s_head + 1;
- if (next >= qp->s_size)
- next = 0;
- if (next == qp->s_last) {
- ret = -ENOMEM;
- goto bail;
- }
-
- rkt = &to_idev(qp->ibqp.device)->lk_table;
- pd = to_ipd(qp->ibqp.pd);
- wqe = get_swqe_ptr(qp, qp->s_head);
-
- if (qp->ibqp.qp_type != IB_QPT_UC &&
- qp->ibqp.qp_type != IB_QPT_RC)
- memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
- else if (wr->opcode == IB_WR_REG_MR)
- memcpy(&wqe->reg_wr, reg_wr(wr),
- sizeof(wqe->reg_wr));
- else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE ||
- wr->opcode == IB_WR_RDMA_READ)
- memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
- else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
- else
- memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
- wqe->length = 0;
- j = 0;
- if (wr->num_sge) {
- acc = wr->opcode >= IB_WR_RDMA_READ ?
- IB_ACCESS_LOCAL_WRITE : 0;
- for (i = 0; i < wr->num_sge; i++) {
- u32 length = wr->sg_list[i].length;
- int ok;
-
- if (length == 0)
- continue;
- ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
- &wr->sg_list[i], acc);
- if (!ok)
- goto bail_inval_free;
- wqe->length += length;
- j++;
- }
- wqe->wr.num_sge = j;
- }
- if (qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_RC) {
- if (wqe->length > 0x80000000U)
- goto bail_inval_free;
- if (wqe->length <= qp->pmtu)
- avoid_schedule = 1;
- } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
- qp->port_num - 1)->ibmtu) {
- goto bail_inval_free;
- } else {
- atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
- avoid_schedule = 1;
- }
- wqe->ssn = qp->s_ssn++;
- qp->s_head = next;
-
- ret = 0;
- goto bail;
-
-bail_inval_free:
- while (j) {
- struct qib_sge *sge = &wqe->sg_list[--j];
-
- qib_put_mr(sge->mr);
- }
-bail_inval:
- ret = -EINVAL;
-bail:
- if (!ret && !wr->next && !avoid_schedule &&
- !qib_sdma_empty(
- dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
- qib_schedule_send(qp);
- *scheduled = 1;
- }
- spin_unlock_irqrestore(&qp->s_lock, flags);
- return ret;
-}
-
-/**
- * qib_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
-{
- struct qib_qp *qp = to_iqp(ibqp);
- int err = 0;
- int scheduled = 0;
-
- for (; wr; wr = wr->next) {
- err = qib_post_one_send(qp, wr, &scheduled);
- if (err) {
- *bad_wr = wr;
- goto bail;
- }
- }
-
- /* Try to do the send work in the caller's context. */
- if (!scheduled)
- qib_do_send(&qp->s_work);
-
-bail:
- return err;
-}
-
-/**
- * qib_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
-{
- struct qib_qp *qp = to_iqp(ibqp);
- struct qib_rwq *wq = qp->r_rq.wq;
- unsigned long flags;
- int ret;
-
- /* Check that state is OK to post receive. */
- if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
-
- for (; wr; wr = wr->next) {
- struct qib_rwqe *wqe;
- u32 next;
- int i;
-
- if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
-
- spin_lock_irqsave(&qp->r_rq.lock, flags);
- next = wq->head + 1;
- if (next >= qp->r_rq.size)
- next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
- *bad_wr = wr;
- ret = -ENOMEM;
- goto bail;
- }
-
- wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
- wqe->wr_id = wr->wr_id;
- wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
- /* Make sure queue entry is written before the head index. */
- smp_wmb();
- wq->head = next;
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
- }
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
* qib_qp_rcv - processing an incoming packet on a QP
* @rcd: the context pointer
* @hdr: the packet header
@@ -579,15 +314,15 @@ bail:
* Called at interrupt level.
*/
static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
spin_lock(&qp->r_lock);
/* Check for valid receive state. */
- if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
- ibp->n_pkt_drops++;
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ ibp->rvp.n_pkt_drops++;
goto unlock;
}
@@ -632,8 +367,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
struct qib_pportdata *ppd = rcd->ppd;
struct qib_ibport *ibp = &ppd->ibport_data;
struct qib_ib_header *hdr = rhdr;
+ struct qib_devdata *dd = ppd->dd;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
struct qib_other_headers *ohdr;
- struct qib_qp *qp;
+ struct rvt_qp *qp;
u32 qp_num;
int lnh;
u8 opcode;
@@ -645,7 +382,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
/* Check for a valid destination LID (see ch. 7.11.1). */
lid = be16_to_cpu(hdr->lrh[1]);
- if (lid < QIB_MULTICAST_LID_BASE) {
+ if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
lid &= ~((1 << ppd->lmc) - 1);
if (unlikely(lid != ppd->lid))
goto drop;
@@ -674,50 +411,40 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
#endif
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
if (qp_num == QIB_MULTICAST_QPN) {
- struct qib_mcast *mcast;
- struct qib_mcast_qp *p;
+ struct rvt_mcast *mcast;
+ struct rvt_mcast_qp *p;
if (lnh != QIB_LRH_GRH)
goto drop;
- mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
if (mcast == NULL)
goto drop;
this_cpu_inc(ibp->pmastats->n_multicast_rcv);
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
/*
- * Notify qib_multicast_detach() if it is waiting for us
+ * Notify rvt_multicast_detach() if it is waiting for us
* to finish.
*/
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
- if (rcd->lookaside_qp) {
- if (rcd->lookaside_qpn != qp_num) {
- if (atomic_dec_and_test(
- &rcd->lookaside_qp->refcount))
- wake_up(
- &rcd->lookaside_qp->wait);
- rcd->lookaside_qp = NULL;
- }
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!qp) {
+ rcu_read_unlock();
+ goto drop;
}
- if (!rcd->lookaside_qp) {
- qp = qib_lookup_qpn(ibp, qp_num);
- if (!qp)
- goto drop;
- rcd->lookaside_qp = qp;
- rcd->lookaside_qpn = qp_num;
- } else
- qp = rcd->lookaside_qp;
this_cpu_inc(ibp->pmastats->n_unicast_rcv);
qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+ rcu_read_unlock();
}
return;
drop:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
}
/*
@@ -728,23 +455,25 @@ static void mem_timer(unsigned long data)
{
struct qib_ibdev *dev = (struct qib_ibdev *) data;
struct list_head *list = &dev->memwait;
- struct qib_qp *qp = NULL;
+ struct rvt_qp *qp = NULL;
+ struct qib_qp_priv *priv = NULL;
unsigned long flags;
- spin_lock_irqsave(&dev->pending_lock, flags);
+ spin_lock_irqsave(&dev->rdi.pending_lock, flags);
if (!list_empty(list)) {
- qp = list_entry(list->next, struct qib_qp, iowait);
- list_del_init(&qp->iowait);
+ priv = list_entry(list->next, struct qib_qp_priv, iowait);
+ qp = priv->owner;
+ list_del_init(&priv->iowait);
atomic_inc(&qp->refcount);
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
}
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
if (qp) {
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & QIB_S_WAIT_KMEM) {
- qp->s_flags &= ~QIB_S_WAIT_KMEM;
+ if (qp->s_flags & RVT_S_WAIT_KMEM) {
+ qp->s_flags &= ~RVT_S_WAIT_KMEM;
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -753,9 +482,9 @@ static void mem_timer(unsigned long data)
}
}
-static void update_sge(struct qib_sge_state *ss, u32 length)
+static void update_sge(struct rvt_sge_state *ss, u32 length)
{
- struct qib_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
sge->vaddr += length;
sge->length -= length;
@@ -764,7 +493,7 @@ static void update_sge(struct qib_sge_state *ss, u32 length)
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
return;
sge->n = 0;
@@ -810,7 +539,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
}
#endif
-static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
u32 length, unsigned flush_wc)
{
u32 extra = 0;
@@ -947,30 +676,31 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
}
static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
- struct qib_qp *qp)
+ struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_verbs_txreq *tx;
unsigned long flags;
spin_lock_irqsave(&qp->s_lock, flags);
- spin_lock(&dev->pending_lock);
+ spin_lock(&dev->rdi.pending_lock);
if (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
list_del(l);
- spin_unlock(&dev->pending_lock);
+ spin_unlock(&dev->rdi.pending_lock);
spin_unlock_irqrestore(&qp->s_lock, flags);
tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
} else {
- if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
- list_empty(&qp->iowait)) {
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
+ list_empty(&priv->iowait)) {
dev->n_txwait++;
- qp->s_flags |= QIB_S_WAIT_TX;
- list_add_tail(&qp->iowait, &dev->txwait);
+ qp->s_flags |= RVT_S_WAIT_TX;
+ list_add_tail(&priv->iowait, &dev->txwait);
}
- qp->s_flags &= ~QIB_S_BUSY;
- spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~RVT_S_BUSY;
+ spin_unlock(&dev->rdi.pending_lock);
spin_unlock_irqrestore(&qp->s_lock, flags);
tx = ERR_PTR(-EBUSY);
}
@@ -978,22 +708,22 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
}
static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
- struct qib_qp *qp)
+ struct rvt_qp *qp)
{
struct qib_verbs_txreq *tx;
unsigned long flags;
- spin_lock_irqsave(&dev->pending_lock, flags);
+ spin_lock_irqsave(&dev->rdi.pending_lock, flags);
/* assume the list non empty */
if (likely(!list_empty(&dev->txreq_free))) {
struct list_head *l = dev->txreq_free.next;
list_del(l);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
} else {
/* call slow path to get the extra lock */
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
tx = __get_txreq(dev, qp);
}
return tx;
@@ -1002,16 +732,15 @@ static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
void qib_put_txreq(struct qib_verbs_txreq *tx)
{
struct qib_ibdev *dev;
- struct qib_qp *qp;
+ struct rvt_qp *qp;
+ struct qib_qp_priv *priv;
unsigned long flags;
qp = tx->qp;
dev = to_idev(qp->ibqp.device);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
if (tx->mr) {
- qib_put_mr(tx->mr);
+ rvt_put_mr(tx->mr);
tx->mr = NULL;
}
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1022,21 +751,23 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
kfree(tx->align_buf);
}
- spin_lock_irqsave(&dev->pending_lock, flags);
+ spin_lock_irqsave(&dev->rdi.pending_lock, flags);
/* Put struct back on free list */
list_add(&tx->txreq.list, &dev->txreq_free);
if (!list_empty(&dev->txwait)) {
/* Wake up first QP wanting a free struct */
- qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
- list_del_init(&qp->iowait);
+ priv = list_entry(dev->txwait.next, struct qib_qp_priv,
+ iowait);
+ qp = priv->owner;
+ list_del_init(&priv->iowait);
atomic_inc(&qp->refcount);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & QIB_S_WAIT_TX) {
- qp->s_flags &= ~QIB_S_WAIT_TX;
+ if (qp->s_flags & RVT_S_WAIT_TX) {
+ qp->s_flags &= ~RVT_S_WAIT_TX;
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1044,7 +775,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
} else
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
}
/*
@@ -1055,36 +786,39 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
*/
void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
{
- struct qib_qp *qp, *nqp;
- struct qib_qp *qps[20];
+ struct rvt_qp *qp, *nqp;
+ struct qib_qp_priv *qpp, *nqpp;
+ struct rvt_qp *qps[20];
struct qib_ibdev *dev;
unsigned i, n;
n = 0;
dev = &ppd->dd->verbs_dev;
- spin_lock(&dev->pending_lock);
+ spin_lock(&dev->rdi.pending_lock);
/* Search wait list for first QP wanting DMA descriptors. */
- list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+ list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
+ qp = qpp->owner;
+ nqp = nqpp->owner;
if (qp->port_num != ppd->port)
continue;
if (n == ARRAY_SIZE(qps))
break;
- if (qp->s_tx->txreq.sg_count > avail)
+ if (qpp->s_tx->txreq.sg_count > avail)
break;
- avail -= qp->s_tx->txreq.sg_count;
- list_del_init(&qp->iowait);
+ avail -= qpp->s_tx->txreq.sg_count;
+ list_del_init(&qpp->iowait);
atomic_inc(&qp->refcount);
qps[n++] = qp;
}
- spin_unlock(&dev->pending_lock);
+ spin_unlock(&dev->rdi.pending_lock);
for (i = 0; i < n; i++) {
qp = qps[i];
spin_lock(&qp->s_lock);
- if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
- qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+ if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
+ qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
qib_schedule_send(qp);
}
spin_unlock(&qp->s_lock);
@@ -1100,7 +834,8 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
{
struct qib_verbs_txreq *tx =
container_of(cookie, struct qib_verbs_txreq, txreq);
- struct qib_qp *qp = tx->qp;
+ struct rvt_qp *qp = tx->qp;
+ struct qib_qp_priv *priv = qp->priv;
spin_lock(&qp->s_lock);
if (tx->wqe)
@@ -1117,11 +852,11 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
}
qib_rc_send_complete(qp, hdr);
}
- if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ if (atomic_dec_and_test(&priv->s_dma_busy)) {
if (qp->state == IB_QPS_RESET)
- wake_up(&qp->wait_dma);
- else if (qp->s_flags & QIB_S_WAIT_DMA) {
- qp->s_flags &= ~QIB_S_WAIT_DMA;
+ wake_up(&priv->wait_dma);
+ else if (qp->s_flags & RVT_S_WAIT_DMA) {
+ qp->s_flags &= ~RVT_S_WAIT_DMA;
qib_schedule_send(qp);
}
}
@@ -1130,22 +865,23 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
qib_put_txreq(tx);
}
-static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
- spin_lock(&dev->pending_lock);
- if (list_empty(&qp->iowait)) {
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+ spin_lock(&dev->rdi.pending_lock);
+ if (list_empty(&priv->iowait)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
- qp->s_flags |= QIB_S_WAIT_KMEM;
- list_add_tail(&qp->iowait, &dev->memwait);
+ qp->s_flags |= RVT_S_WAIT_KMEM;
+ list_add_tail(&priv->iowait, &dev->memwait);
}
- spin_unlock(&dev->pending_lock);
- qp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock(&dev->rdi.pending_lock);
+ qp->s_flags &= ~RVT_S_BUSY;
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1153,10 +889,11 @@ static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
return ret;
}
-static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
- u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
struct qib_devdata *dd = dd_from_dev(dev);
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -1167,9 +904,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
u32 ndesc;
int ret;
- tx = qp->s_tx;
+ tx = priv->s_tx;
if (tx) {
- qp->s_tx = NULL;
+ priv->s_tx = NULL;
/* resend previously constructed packet */
ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
goto bail;
@@ -1182,7 +919,6 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
be16_to_cpu(hdr->lrh[0]) >> 12);
tx->qp = qp;
- atomic_inc(&qp->refcount);
tx->wqe = qp->s_wqe;
tx->mr = qp->s_rdma_mr;
if (qp->s_rdma_mr)
@@ -1245,7 +981,7 @@ err_tx:
qib_put_txreq(tx);
ret = wait_kmem(dev, qp);
unaligned:
- ibp->n_unaligned++;
+ ibp->rvp.n_unaligned++;
bail:
return ret;
bail_tx:
@@ -1257,8 +993,9 @@ bail_tx:
* If we are now in the error state, return zero to flush the
* send work request.
*/
-static int no_bufs_available(struct qib_qp *qp)
+static int no_bufs_available(struct rvt_qp *qp)
{
+ struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
struct qib_devdata *dd;
unsigned long flags;
@@ -1271,25 +1008,25 @@ static int no_bufs_available(struct qib_qp *qp)
* enabling the PIO avail interrupt.
*/
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
- spin_lock(&dev->pending_lock);
- if (list_empty(&qp->iowait)) {
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+ spin_lock(&dev->rdi.pending_lock);
+ if (list_empty(&priv->iowait)) {
dev->n_piowait++;
- qp->s_flags |= QIB_S_WAIT_PIO;
- list_add_tail(&qp->iowait, &dev->piowait);
+ qp->s_flags |= RVT_S_WAIT_PIO;
+ list_add_tail(&priv->iowait, &dev->piowait);
dd = dd_from_dev(dev);
dd->f_wantpiobuf_intr(dd, 1);
}
- spin_unlock(&dev->pending_lock);
- qp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock(&dev->rdi.pending_lock);
+ qp->s_flags &= ~RVT_S_BUSY;
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
-static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
- u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+ u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1370,7 +1107,7 @@ done:
}
qib_sendbuf_done(dd, pbufn);
if (qp->s_rdma_mr) {
- qib_put_mr(qp->s_rdma_mr);
+ rvt_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_wqe) {
@@ -1394,10 +1131,10 @@ done:
* @len: the length of the packet in bytes
*
* Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
*/
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
- u32 hdrwords, struct qib_sge_state *ss, u32 len)
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct rvt_sge_state *ss, u32 len)
{
struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
u32 plen;
@@ -1529,10 +1266,11 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
{
struct qib_ibdev *dev = &dd->verbs_dev;
struct list_head *list;
- struct qib_qp *qps[5];
- struct qib_qp *qp;
+ struct rvt_qp *qps[5];
+ struct rvt_qp *qp;
unsigned long flags;
unsigned i, n;
+ struct qib_qp_priv *priv;
list = &dev->piowait;
n = 0;
@@ -1543,25 +1281,26 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
* could end up with QPs on the wait list with the interrupt
* disabled.
*/
- spin_lock_irqsave(&dev->pending_lock, flags);
+ spin_lock_irqsave(&dev->rdi.pending_lock, flags);
while (!list_empty(list)) {
if (n == ARRAY_SIZE(qps))
goto full;
- qp = list_entry(list->next, struct qib_qp, iowait);
- list_del_init(&qp->iowait);
+ priv = list_entry(list->next, struct qib_qp_priv, iowait);
+ qp = priv->owner;
+ list_del_init(&priv->iowait);
atomic_inc(&qp->refcount);
qps[n++] = qp;
}
dd->f_wantpiobuf_intr(dd, 0);
full:
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
for (i = 0; i < n; i++) {
qp = qps[i];
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & QIB_S_WAIT_PIO) {
- qp->s_flags &= ~QIB_S_WAIT_PIO;
+ if (qp->s_flags & RVT_S_WAIT_PIO) {
+ qp->s_flags &= ~RVT_S_WAIT_PIO;
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1572,82 +1311,24 @@ full:
}
}
-static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
- struct ib_udata *uhw)
-{
- struct qib_devdata *dd = dd_from_ibdev(ibdev);
- struct qib_ibdev *dev = to_idev(ibdev);
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
- memset(props, 0, sizeof(*props));
-
- props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
- IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
- IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
- props->page_size_cap = PAGE_SIZE;
- props->vendor_id =
- QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
- props->vendor_part_id = dd->deviceid;
- props->hw_ver = dd->minrev;
- props->sys_image_guid = ib_qib_sys_image_guid;
- props->max_mr_size = ~0ULL;
- props->max_qp = ib_qib_max_qps;
- props->max_qp_wr = ib_qib_max_qp_wrs;
- props->max_sge = ib_qib_max_sges;
- props->max_sge_rd = ib_qib_max_sges;
- props->max_cq = ib_qib_max_cqs;
- props->max_ah = ib_qib_max_ahs;
- props->max_cqe = ib_qib_max_cqes;
- props->max_mr = dev->lk_table.max;
- props->max_fmr = dev->lk_table.max;
- props->max_map_per_fmr = 32767;
- props->max_pd = ib_qib_max_pds;
- props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
- props->max_qp_init_rd_atom = 255;
- /* props->max_res_rd_atom */
- props->max_srq = ib_qib_max_srqs;
- props->max_srq_wr = ib_qib_max_srq_wrs;
- props->max_srq_sge = ib_qib_max_srq_sges;
- /* props->local_ca_ack_delay */
- props->atomic_cap = IB_ATOMIC_GLOB;
- props->max_pkeys = qib_get_npkeys(dd);
- props->max_mcast_grp = ib_qib_max_mcast_grps;
- props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
- props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
- props->max_mcast_grp;
-
- return 0;
-}
-
-static int qib_query_port(struct ib_device *ibdev, u8 port,
+static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
struct ib_port_attr *props)
{
- struct qib_devdata *dd = dd_from_ibdev(ibdev);
- struct qib_ibport *ibp = to_iport(ibdev, port);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = dd_from_dev(ibdev);
+ struct qib_pportdata *ppd = &dd->pport[port_num - 1];
enum ib_mtu mtu;
u16 lid = ppd->lid;
- memset(props, 0, sizeof(*props));
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = ppd->lmc;
- props->sm_lid = ibp->sm_lid;
- props->sm_sl = ibp->sm_sl;
props->state = dd->f_iblink_state(ppd->lastibcstat);
props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
- props->port_cap_flags = ibp->port_cap_flags;
props->gid_tbl_len = QIB_GUIDS_PER_PORT;
- props->max_msg_sz = 0x80000000;
- props->pkey_tbl_len = qib_get_npkeys(dd);
- props->bad_pkey_cntr = ibp->pkey_violations;
- props->qkey_viol_cntr = ibp->qkey_violations;
props->active_width = ppd->link_width_active;
/* See rate_show() */
props->active_speed = ppd->link_speed_active;
props->max_vl_num = qib_num_vls(ppd->vls_supported);
- props->init_type_reply = 0;
props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
switch (ppd->ibmtu) {
@@ -1670,7 +1351,6 @@ static int qib_query_port(struct ib_device *ibdev, u8 port,
mtu = IB_MTU_2048;
}
props->active_mtu = mtu;
- props->subnet_timeout = ibp->subnet_timeout;
return 0;
}
@@ -1714,185 +1394,70 @@ bail:
return ret;
}
-static int qib_modify_port(struct ib_device *ibdev, u8 port,
- int port_modify_mask, struct ib_port_modify *props)
+static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
{
- struct qib_ibport *ibp = to_iport(ibdev, port);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
- ibp->port_cap_flags |= props->set_port_cap_mask;
- ibp->port_cap_flags &= ~props->clr_port_cap_mask;
- if (props->set_port_cap_mask || props->clr_port_cap_mask)
- qib_cap_mask_chg(ibp);
- if (port_modify_mask & IB_PORT_SHUTDOWN)
- qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
- if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
- ibp->qkey_violations = 0;
- return 0;
-}
-
-static int qib_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
-{
- struct qib_devdata *dd = dd_from_ibdev(ibdev);
- int ret = 0;
-
- if (!port || port > dd->num_pports)
- ret = -EINVAL;
- else {
- struct qib_ibport *ibp = to_iport(ibdev, port);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
- gid->global.subnet_prefix = ibp->gid_prefix;
- if (index == 0)
- gid->global.interface_id = ppd->guid;
- else if (index < QIB_GUIDS_PER_PORT)
- gid->global.interface_id = ibp->guids[index - 1];
- else
- ret = -EINVAL;
- }
-
- return ret;
-}
+ struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+ struct qib_devdata *dd = dd_from_dev(ibdev);
+ struct qib_pportdata *ppd = &dd->pport[port_num - 1];
-static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct qib_ibdev *dev = to_idev(ibdev);
- struct qib_pd *pd;
- struct ib_pd *ret;
+ qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
- /*
- * This is actually totally arbitrary. Some correctness tests
- * assume there's a maximum number of PDs that can be allocated.
- * We don't actually have this limit, but we fail the test if
- * we allow allocations of more than we report for this value.
- */
-
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- spin_lock(&dev->n_pds_lock);
- if (dev->n_pds_allocated == ib_qib_max_pds) {
- spin_unlock(&dev->n_pds_lock);
- kfree(pd);
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- dev->n_pds_allocated++;
- spin_unlock(&dev->n_pds_lock);
-
- /* ib_alloc_pd() will initialize pd->ibpd. */
- pd->user = udata != NULL;
-
- ret = &pd->ibpd;
-
-bail:
- return ret;
+ return 0;
}
-static int qib_dealloc_pd(struct ib_pd *ibpd)
+static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+ int guid_index, __be64 *guid)
{
- struct qib_pd *pd = to_ipd(ibpd);
- struct qib_ibdev *dev = to_idev(ibpd->device);
-
- spin_lock(&dev->n_pds_lock);
- dev->n_pds_allocated--;
- spin_unlock(&dev->n_pds_lock);
+ struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- kfree(pd);
+ if (guid_index == 0)
+ *guid = ppd->guid;
+ else if (guid_index < QIB_GUIDS_PER_PORT)
+ *guid = ibp->guids[guid_index - 1];
+ else
+ return -EINVAL;
return 0;
}
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
{
- /* A multicast address requires a GRH (see ch. 8.4.1). */
- if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
- ah_attr->dlid != QIB_PERMISSIVE_LID &&
- !(ah_attr->ah_flags & IB_AH_GRH))
- goto bail;
- if ((ah_attr->ah_flags & IB_AH_GRH) &&
- ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
- goto bail;
- if (ah_attr->dlid == 0)
- goto bail;
- if (ah_attr->port_num < 1 ||
- ah_attr->port_num > ibdev->phys_port_cnt)
- goto bail;
- if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
- ib_rate_to_mult(ah_attr->static_rate) < 0)
- goto bail;
if (ah_attr->sl > 15)
- goto bail;
+ return -EINVAL;
+
return 0;
-bail:
- return -EINVAL;
}
-/**
- * qib_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *qib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+static void qib_notify_new_ah(struct ib_device *ibdev,
+ struct ib_ah_attr *ah_attr,
+ struct rvt_ah *ah)
{
- struct qib_ah *ah;
- struct ib_ah *ret;
- struct qib_ibdev *dev = to_idev(pd->device);
- unsigned long flags;
+ struct qib_ibport *ibp;
+ struct qib_pportdata *ppd;
- if (qib_check_ah(pd->device, ah_attr)) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
-
- ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- spin_lock_irqsave(&dev->n_ahs_lock, flags);
- if (dev->n_ahs_allocated == ib_qib_max_ahs) {
- spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
- kfree(ah);
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- dev->n_ahs_allocated++;
- spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
- /* ib_create_ah() will initialize ah->ibah. */
- ah->attr = *ah_attr;
- atomic_set(&ah->refcount, 0);
-
- ret = &ah->ibah;
+ /*
+ * Do not trust reading anything from rvt_ah at this point as it is not
+ * done being setup. We can however modify things which we need to set.
+ */
-bail:
- return ret;
+ ibp = to_iport(ibdev, ah_attr->port_num);
+ ppd = ppd_from_ibp(ibp);
+ ah->vl = ibp->sl_to_vl[ah->attr.sl];
+ ah->log_pmtu = ilog2(ppd->ibmtu);
}
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
{
struct ib_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
- struct qib_qp *qp0;
+ struct rvt_qp *qp0;
memset(&attr, 0, sizeof(attr));
attr.dlid = dlid;
attr.port_num = ppd_from_ibp(ibp)->port;
rcu_read_lock();
- qp0 = rcu_dereference(ibp->qp0);
+ qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
ah = ib_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
@@ -1900,51 +1465,6 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
}
/**
- * qib_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int qib_destroy_ah(struct ib_ah *ibah)
-{
- struct qib_ibdev *dev = to_idev(ibah->device);
- struct qib_ah *ah = to_iah(ibah);
- unsigned long flags;
-
- if (atomic_read(&ah->refcount) != 0)
- return -EBUSY;
-
- spin_lock_irqsave(&dev->n_ahs_lock, flags);
- dev->n_ahs_allocated--;
- spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
- kfree(ah);
-
- return 0;
-}
-
-static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
- struct qib_ah *ah = to_iah(ibah);
-
- if (qib_check_ah(ibah->device, ah_attr))
- return -EINVAL;
-
- ah->attr = *ah_attr;
-
- return 0;
-}
-
-static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
- struct qib_ah *ah = to_iah(ibah);
-
- *ah_attr = ah->attr;
-
- return 0;
-}
-
-/**
* qib_get_npkeys - return the size of the PKEY table for context 0
* @dd: the qlogic_ib device
*/
@@ -1973,75 +1493,27 @@ unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
return ret;
}
-static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- struct qib_devdata *dd = dd_from_ibdev(ibdev);
- int ret;
-
- if (index >= qib_get_npkeys(dd)) {
- ret = -EINVAL;
- goto bail;
- }
-
- *pkey = qib_get_pkey(to_iport(ibdev, port), index);
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * qib_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the QLogic_IB driver
- */
-
-static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
-{
- struct qib_ucontext *context;
- struct ib_ucontext *ret;
-
- context = kmalloc(sizeof(*context), GFP_KERNEL);
- if (!context) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- ret = &context->ibucontext;
-
-bail:
- return ret;
-}
-
-static int qib_dealloc_ucontext(struct ib_ucontext *context)
-{
- kfree(to_iucontext(context));
- return 0;
-}
-
static void init_ibport(struct qib_pportdata *ppd)
{
struct qib_verbs_counters cntrs;
struct qib_ibport *ibp = &ppd->ibport_data;
- spin_lock_init(&ibp->lock);
+ spin_lock_init(&ibp->rvp.lock);
/* Set the prefix to the default value (see ch. 4.1.1) */
- ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
- ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
- ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+ ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+ ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
IB_PORT_OTHER_LOCAL_CHANGES_SUP;
if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
- ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
- ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
- ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
- ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
- ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
- ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+ ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+ ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+ ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+ ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+ ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+ ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
/* Snapshot current HW counters to "clear" them. */
qib_get_counters(ppd, &cntrs);
@@ -2061,26 +1533,55 @@ static void init_ibport(struct qib_pportdata *ppd)
ibp->z_excessive_buffer_overrun_errors =
cntrs.excessive_buffer_overrun_errors;
ibp->z_vl15_dropped = cntrs.vl15_dropped;
- RCU_INIT_POINTER(ibp->qp0, NULL);
- RCU_INIT_POINTER(ibp->qp1, NULL);
+ RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+ RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
-static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
+/**
+ * qib_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void qib_fill_device_attr(struct qib_devdata *dd)
{
- struct ib_port_attr attr;
- int err;
-
- err = qib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- return 0;
+ rdi->dparms.props.max_pd = ib_qib_max_pds;
+ rdi->dparms.props.max_ah = ib_qib_max_ahs;
+ rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+ IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ rdi->dparms.props.page_size_cap = PAGE_SIZE;
+ rdi->dparms.props.vendor_id =
+ QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+ rdi->dparms.props.vendor_part_id = dd->deviceid;
+ rdi->dparms.props.hw_ver = dd->minrev;
+ rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
+ rdi->dparms.props.max_mr_size = ~0ULL;
+ rdi->dparms.props.max_qp = ib_qib_max_qps;
+ rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
+ rdi->dparms.props.max_sge = ib_qib_max_sges;
+ rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
+ rdi->dparms.props.max_cq = ib_qib_max_cqs;
+ rdi->dparms.props.max_cqe = ib_qib_max_cqes;
+ rdi->dparms.props.max_ah = ib_qib_max_ahs;
+ rdi->dparms.props.max_mr = rdi->lkey_table.max;
+ rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+ rdi->dparms.props.max_map_per_fmr = 32767;
+ rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+ rdi->dparms.props.max_qp_init_rd_atom = 255;
+ rdi->dparms.props.max_srq = ib_qib_max_srqs;
+ rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
+ rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
+ rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+ rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
+ rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
+ rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+ rdi->dparms.props.max_total_mcast_qp_attach =
+ rdi->dparms.props.max_mcast_qp_attach *
+ rdi->dparms.props.max_mcast_grp;
}
/**
@@ -2091,68 +1592,20 @@ static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
int qib_register_ib_device(struct qib_devdata *dd)
{
struct qib_ibdev *dev = &dd->verbs_dev;
- struct ib_device *ibdev = &dev->ibdev;
+ struct ib_device *ibdev = &dev->rdi.ibdev;
struct qib_pportdata *ppd = dd->pport;
- unsigned i, lk_tab_size;
+ unsigned i, ctxt;
int ret;
- dev->qp_table_size = ib_qib_qp_table_size;
get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
- dev->qp_table = kmalloc_array(
- dev->qp_table_size,
- sizeof(*dev->qp_table),
- GFP_KERNEL);
- if (!dev->qp_table) {
- ret = -ENOMEM;
- goto err_qpt;
- }
- for (i = 0; i < dev->qp_table_size; i++)
- RCU_INIT_POINTER(dev->qp_table[i], NULL);
-
for (i = 0; i < dd->num_pports; i++)
init_ibport(ppd + i);
/* Only need to initialize non-zero fields. */
- spin_lock_init(&dev->qpt_lock);
- spin_lock_init(&dev->n_pds_lock);
- spin_lock_init(&dev->n_ahs_lock);
- spin_lock_init(&dev->n_cqs_lock);
- spin_lock_init(&dev->n_qps_lock);
- spin_lock_init(&dev->n_srqs_lock);
- spin_lock_init(&dev->n_mcast_grps_lock);
- init_timer(&dev->mem_timer);
- dev->mem_timer.function = mem_timer;
- dev->mem_timer.data = (unsigned long) dev;
-
- qib_init_qpn_table(dd, &dev->qpn_table);
+ setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
+
+ qpt_mask = dd->qpn_mask;
- /*
- * The top ib_qib_lkey_table_size bits are used to index the
- * table. The lower 8 bits can be owned by the user (copied from
- * the LKEY). The remaining bits act as a generation number or tag.
- */
- spin_lock_init(&dev->lk_table.lock);
- /* insure generation is at least 4 bits see keys.c */
- if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) {
- qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
- ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS);
- ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS;
- }
- dev->lk_table.max = 1 << ib_qib_lkey_table_size;
- lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
- dev->lk_table.table = (struct qib_mregion __rcu **)
- vmalloc(lk_tab_size);
- if (dev->lk_table.table == NULL) {
- ret = -ENOMEM;
- goto err_lk;
- }
- RCU_INIT_POINTER(dev->dma_mr, NULL);
- for (i = 0; i < dev->lk_table.max; i++)
- RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
- INIT_LIST_HEAD(&dev->pending_mmaps);
- spin_lock_init(&dev->pending_lock);
- dev->mmap_offset = PAGE_SIZE;
- spin_lock_init(&dev->mmap_offset_lock);
INIT_LIST_HEAD(&dev->piowait);
INIT_LIST_HEAD(&dev->dmawait);
INIT_LIST_HEAD(&dev->txwait);
@@ -2194,110 +1647,91 @@ int qib_register_ib_device(struct qib_devdata *dd)
strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
ibdev->owner = THIS_MODULE;
ibdev->node_guid = ppd->guid;
- ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
- ibdev->uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
- (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
- ibdev->node_type = RDMA_NODE_IB_CA;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->num_comp_vectors = 1;
ibdev->dma_device = &dd->pcidev->dev;
- ibdev->query_device = qib_query_device;
ibdev->modify_device = qib_modify_device;
- ibdev->query_port = qib_query_port;
- ibdev->modify_port = qib_modify_port;
- ibdev->query_pkey = qib_query_pkey;
- ibdev->query_gid = qib_query_gid;
- ibdev->alloc_ucontext = qib_alloc_ucontext;
- ibdev->dealloc_ucontext = qib_dealloc_ucontext;
- ibdev->alloc_pd = qib_alloc_pd;
- ibdev->dealloc_pd = qib_dealloc_pd;
- ibdev->create_ah = qib_create_ah;
- ibdev->destroy_ah = qib_destroy_ah;
- ibdev->modify_ah = qib_modify_ah;
- ibdev->query_ah = qib_query_ah;
- ibdev->create_srq = qib_create_srq;
- ibdev->modify_srq = qib_modify_srq;
- ibdev->query_srq = qib_query_srq;
- ibdev->destroy_srq = qib_destroy_srq;
- ibdev->create_qp = qib_create_qp;
- ibdev->modify_qp = qib_modify_qp;
- ibdev->query_qp = qib_query_qp;
- ibdev->destroy_qp = qib_destroy_qp;
- ibdev->post_send = qib_post_send;
- ibdev->post_recv = qib_post_receive;
- ibdev->post_srq_recv = qib_post_srq_receive;
- ibdev->create_cq = qib_create_cq;
- ibdev->destroy_cq = qib_destroy_cq;
- ibdev->resize_cq = qib_resize_cq;
- ibdev->poll_cq = qib_poll_cq;
- ibdev->req_notify_cq = qib_req_notify_cq;
- ibdev->get_dma_mr = qib_get_dma_mr;
- ibdev->reg_user_mr = qib_reg_user_mr;
- ibdev->dereg_mr = qib_dereg_mr;
- ibdev->alloc_mr = qib_alloc_mr;
- ibdev->map_mr_sg = qib_map_mr_sg;
- ibdev->alloc_fmr = qib_alloc_fmr;
- ibdev->map_phys_fmr = qib_map_phys_fmr;
- ibdev->unmap_fmr = qib_unmap_fmr;
- ibdev->dealloc_fmr = qib_dealloc_fmr;
- ibdev->attach_mcast = qib_multicast_attach;
- ibdev->detach_mcast = qib_multicast_detach;
ibdev->process_mad = qib_process_mad;
- ibdev->mmap = qib_mmap;
- ibdev->dma_ops = &qib_dma_mapping_ops;
- ibdev->get_port_immutable = qib_port_immutable;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
"Intel Infiniband HCA %s", init_utsname()->nodename);
- ret = ib_register_device(ibdev, qib_create_port_files);
- if (ret)
- goto err_reg;
+ /*
+ * Fill in rvt info object.
+ */
+ dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
+ dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
+ dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
+ dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
+ dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
+ dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
+ dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
+ dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
+ dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
+ dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
+ dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
+ dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
+ dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
+ dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
+ dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
+ dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+ dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
+ dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
+ dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
+ dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
+ dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
+ dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
+ dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
+ dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
+ qib_notify_create_mad_agent;
+ dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
+ qib_notify_free_mad_agent;
+
+ dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
+ dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
+ dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
+ dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
+ dd->verbs_dev.rdi.dparms.qpn_start = 1;
+ dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
+ dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
+ dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+ dd->verbs_dev.rdi.dparms.qos_shift = 1;
+ dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
+ dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
+ dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
+ dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+ dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
+ dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
+ dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+
+ snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+ sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+ "qib_cq%d", dd->unit);
+
+ qib_fill_device_attr(dd);
+
+ ppd = dd->pport;
+ for (i = 0; i < dd->num_pports; i++, ppd++) {
+ ctxt = ppd->hw_pidx;
+ rvt_init_port(&dd->verbs_dev.rdi,
+ &ppd->ibport_data.rvp,
+ i,
+ dd->rcd[ctxt]->pkeys);
+ }
- ret = qib_create_agents(dev);
+ ret = rvt_register_device(&dd->verbs_dev.rdi);
if (ret)
- goto err_agents;
+ goto err_tx;
ret = qib_verbs_register_sysfs(dd);
if (ret)
goto err_class;
- goto bail;
+ return ret;
err_class:
- qib_free_agents(dev);
-err_agents:
- ib_unregister_device(ibdev);
-err_reg:
+ rvt_unregister_device(&dd->verbs_dev.rdi);
err_tx:
while (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
@@ -2313,27 +1747,17 @@ err_tx:
sizeof(struct qib_pio_header),
dev->pio_hdrs, dev->pio_hdrs_phys);
err_hdrs:
- vfree(dev->lk_table.table);
-err_lk:
- kfree(dev->qp_table);
-err_qpt:
qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
return ret;
}
void qib_unregister_ib_device(struct qib_devdata *dd)
{
struct qib_ibdev *dev = &dd->verbs_dev;
- struct ib_device *ibdev = &dev->ibdev;
- u32 qps_inuse;
- unsigned lk_tab_size;
qib_verbs_unregister_sysfs(dd);
- qib_free_agents(dev);
-
- ib_unregister_device(ibdev);
+ rvt_unregister_device(&dd->verbs_dev.rdi);
if (!list_empty(&dev->piowait))
qib_dev_err(dd, "piowait list not empty!\n");
@@ -2343,16 +1767,8 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
qib_dev_err(dd, "txwait list not empty!\n");
if (!list_empty(&dev->memwait))
qib_dev_err(dd, "memwait list not empty!\n");
- if (dev->dma_mr)
- qib_dev_err(dd, "DMA MR not NULL!\n");
-
- qps_inuse = qib_free_all_qps(dd);
- if (qps_inuse)
- qib_dev_err(dd, "QP memory leak! %u still in use\n",
- qps_inuse);
del_timer_sync(&dev->mem_timer);
- qib_free_qpn_table(&dev->qpn_table);
while (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
struct qib_verbs_txreq *tx;
@@ -2366,21 +1782,36 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
dd->pport->sdma_descq_cnt *
sizeof(struct qib_pio_header),
dev->pio_hdrs, dev->pio_hdrs_phys);
- lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
- vfree(dev->lk_table.table);
- kfree(dev->qp_table);
}
-/*
- * This must be called with s_lock held.
+/**
+ * _qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules progress w/o regard to the s_flags.
+ *
+ * It is only used in post send, which doesn't hold
+ * the s_lock.
*/
-void qib_schedule_send(struct qib_qp *qp)
+void _qib_schedule_send(struct rvt_qp *qp)
{
- if (qib_send_ok(qp)) {
- struct qib_ibport *ibp =
- to_iport(qp->ibqp.device, qp->port_num);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_qp_priv *priv = qp->priv;
- queue_work(ppd->qib_wq, &qp->s_work);
- }
+ queue_work(ppd->qib_wq, &priv->s_work);
+}
+
+/**
+ * qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules qp progress. The s_lock
+ * should be held.
+ */
+void qib_schedule_send(struct rvt_qp *qp)
+{
+ if (qib_send_ok(qp))
+ _qib_schedule_send(qp);
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 6c5e77753d85..4b76a8d59337 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -45,6 +45,8 @@
#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
struct qib_ctxtdata;
struct qib_pportdata;
@@ -53,9 +55,7 @@ struct qib_verbs_txreq;
#define QIB_MAX_RDMA_ATOMIC 16
#define QIB_GUIDS_PER_PORT 5
-
-#define QPN_MAX (1 << 24)
-#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define QIB_PSN_SHIFT 8
/*
* Increment this value if any changes that break userspace ABI
@@ -63,12 +63,6 @@ struct qib_verbs_txreq;
*/
#define QIB_UVERBS_ABI_VERSION 2
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
-
#define IB_SEQ_NAK (3 << 29)
/* AETH NAK opcode values */
@@ -79,17 +73,6 @@ struct qib_verbs_txreq;
#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
#define IB_NAK_INVALID_RD_REQUEST 0x64
-/* Flags for checking QP state (see ib_qib_state_ops[]) */
-#define QIB_POST_SEND_OK 0x01
-#define QIB_POST_RECV_OK 0x02
-#define QIB_PROCESS_RECV_OK 0x04
-#define QIB_PROCESS_SEND_OK 0x08
-#define QIB_PROCESS_NEXT_SEND_OK 0x10
-#define QIB_FLUSH_SEND 0x20
-#define QIB_FLUSH_RECV 0x40
-#define QIB_PROCESS_OR_FLUSH_SEND \
- (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -203,468 +186,21 @@ struct qib_pio_header {
} __packed;
/*
- * There is one struct qib_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct qib_mcast_qp.
+ * qib specific data structure that will be hidden from rvt after the queue pair
+ * is made common.
*/
-struct qib_mcast_qp {
- struct list_head list;
- struct qib_qp *qp;
-};
-
-struct qib_mcast {
- struct rb_node rb_node;
- union ib_gid mgid;
- struct list_head qp_list;
- wait_queue_head_t wait;
- atomic_t refcount;
- int n_attached;
-};
-
-/* Protection domain */
-struct qib_pd {
- struct ib_pd ibpd;
- int user; /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct qib_ah {
- struct ib_ah ibah;
- struct ib_ah_attr attr;
- atomic_t refcount;
-};
-
-/*
- * This structure is used by qib_mmap() to validate an offset
- * when an mmap() request is made. The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct qib_mmap_info {
- struct list_head pending_mmaps;
- struct ib_ucontext *context;
- void *obj;
- __u64 offset;
- struct kref ref;
- unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct qib_cq_wc {
- u32 head; /* index of next entry to fill */
- u32 tail; /* index of next ib_poll_cq() entry */
- union {
- /* these are actually size ibcq.cqe + 1 */
- struct ib_uverbs_wc uqueue[0];
- struct ib_wc kqueue[0];
- };
-};
-
-/*
- * The completion queue structure.
- */
-struct qib_cq {
- struct ib_cq ibcq;
- struct kthread_work comptask;
- struct qib_devdata *dd;
- spinlock_t lock; /* protect changes in this struct */
- u8 notify;
- u8 triggered;
- struct qib_cq_wc *queue;
- struct qib_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct qib_seg {
- void *vaddr;
- size_t length;
-};
-
-/* The number of qib_segs that fit in a page. */
-#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg))
-
-struct qib_segarray {
- struct qib_seg segs[QIB_SEGSZ];
-};
-
-struct qib_mregion {
- struct ib_pd *pd; /* shares refcnt of ibmr.pd */
- u64 user_base; /* User's address for this region */
- u64 iova; /* IB start address of this region */
- size_t length;
- u32 lkey;
- u32 offset; /* offset (bytes) to start of region */
- int access_flags;
- u32 max_segs; /* number of qib_segs in all the arrays */
- u32 mapsz; /* size of the map array */
- u8 page_shift; /* 0 - non unform/non powerof2 sizes */
- u8 lkey_published; /* in global table */
- struct completion comp; /* complete when refcount goes to zero */
- struct rcu_head list;
- atomic_t refcount;
- struct qib_segarray *map[0]; /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct qib_sge {
- struct qib_mregion *mr;
- void *vaddr; /* kernel virtual address of segment */
- u32 sge_length; /* length of the SGE */
- u32 length; /* remaining length of the segment */
- u16 m; /* current index: mr->map[m] */
- u16 n; /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct qib_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- u64 *pages;
- u32 npages;
- struct qib_mregion mr; /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct qib_swqe {
- union {
- struct ib_send_wr wr; /* don't use wr.sg_list */
- struct ib_ud_wr ud_wr;
- struct ib_reg_wr reg_wr;
- struct ib_rdma_wr rdma_wr;
- struct ib_atomic_wr atomic_wr;
- };
- u32 psn; /* first packet sequence number */
- u32 lpsn; /* last packet sequence number */
- u32 ssn; /* send sequence number */
- u32 length; /* total length of data in sg_list */
- struct qib_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct qib_rwqe {
- u64 wr_id;
- u8 num_sge;
- struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct qib_rwq {
- u32 head; /* new work requests posted to the head */
- u32 tail; /* receives pull requests from here. */
- struct qib_rwqe wq[0];
-};
-
-struct qib_rq {
- struct qib_rwq *wq;
- u32 size; /* size of RWQE array */
- u8 max_sge;
- spinlock_t lock /* protect changes in this struct */
- ____cacheline_aligned_in_smp;
-};
-
-struct qib_srq {
- struct ib_srq ibsrq;
- struct qib_rq rq;
- struct qib_mmap_info *ip;
- /* send signal when number of RWQEs < limit */
- u32 limit;
-};
-
-struct qib_sge_state {
- struct qib_sge *sg_list; /* next SGE to be used if any */
- struct qib_sge sge; /* progress state for the current SGE */
- u32 total_len;
- u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct qib_ack_entry {
- u8 opcode;
- u8 sent;
- u32 psn;
- u32 lpsn;
- union {
- struct qib_sge rdma_sge;
- u64 atomic_data;
- };
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct qib_qp {
- struct ib_qp ibqp;
- /* read mostly fields above and below */
- struct ib_ah_attr remote_ah_attr;
- struct ib_ah_attr alt_ah_attr;
- struct qib_qp __rcu *next; /* link list for QPN hash table */
- struct qib_swqe *s_wq; /* send work queue */
- struct qib_mmap_info *ip;
- struct qib_ib_header *s_hdr; /* next packet header to send */
- unsigned long timeout_jiffies; /* computed from timeout */
-
- enum ib_mtu path_mtu;
- u32 remote_qpn;
- u32 pmtu; /* decoded from path_mtu */
- u32 qkey; /* QKEY for this QP (for UD or RD) */
- u32 s_size; /* send work queue size */
- u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
-
- u8 state; /* QP state */
- u8 qp_access_flags;
- u8 alt_timeout; /* Alternate path timeout for this QP */
- u8 timeout; /* Timeout for this QP */
- u8 s_srate;
- u8 s_mig_state;
- u8 port_num;
- u8 s_pkey_index; /* PKEY index to use */
- u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
- u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
- u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
- u8 s_retry_cnt; /* number of times to retry */
- u8 s_rnr_retry_cnt;
- u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 s_max_sge; /* size of s_wq->sg_list */
- u8 s_draining;
-
- /* start of read/write fields */
-
- atomic_t refcount ____cacheline_aligned_in_smp;
- wait_queue_head_t wait;
-
-
- struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
- ____cacheline_aligned_in_smp;
- struct qib_sge_state s_rdma_read_sge;
-
- spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
- unsigned long r_aflags;
- u64 r_wr_id; /* ID for current receive WQE */
- u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
- u32 r_len; /* total length of r_sge */
- u32 r_rcv_len; /* receive data len processed */
- u32 r_psn; /* expected rcv packet sequence number */
- u32 r_msn; /* message sequence number */
-
- u8 r_state; /* opcode of last packet received */
- u8 r_flags;
- u8 r_head_ack_queue; /* index into s_ack_queue[] */
-
- struct list_head rspwait; /* link for waititing to respond */
-
- struct qib_sge_state r_sge; /* current receive data */
- struct qib_rq r_rq; /* receive work queue */
-
- spinlock_t s_lock ____cacheline_aligned_in_smp;
- struct qib_sge_state *s_cur_sge;
- u32 s_flags;
- struct qib_verbs_txreq *s_tx;
- struct qib_swqe *s_wqe;
- struct qib_sge_state s_sge; /* current send request data */
- struct qib_mregion *s_rdma_mr;
- atomic_t s_dma_busy;
- u32 s_cur_size; /* size of send packet in bytes */
- u32 s_len; /* total length of s_sge */
- u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
- u32 s_next_psn; /* PSN for next request */
- u32 s_last_psn; /* last response PSN processed */
- u32 s_sending_psn; /* lowest PSN that is being sent */
- u32 s_sending_hpsn; /* highest PSN that is being sent */
- u32 s_psn; /* current packet sequence number */
- u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
- u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
- u32 s_head; /* new entries added here */
- u32 s_tail; /* next entry to process */
- u32 s_cur; /* current work queue entry */
- u32 s_acked; /* last un-ACK'ed entry */
- u32 s_last; /* last completed entry */
- u32 s_ssn; /* SSN of tail entry */
- u32 s_lsn; /* limit sequence number (credit) */
- u16 s_hdrwords; /* size of s_hdr in 32 bit words */
- u16 s_rdma_ack_cnt;
- u8 s_state; /* opcode of last packet sent */
- u8 s_ack_state; /* opcode of packet to ACK */
- u8 s_nak_state; /* non-zero if NAK is pending */
- u8 r_nak_state; /* non-zero if NAK is pending */
- u8 s_retry; /* requester retry counter */
- u8 s_rnr_retry; /* requester RNR retry counter */
- u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
- u8 s_tail_ack_queue; /* index into s_ack_queue[] */
-
- struct qib_sge_state s_ack_rdma_sge;
- struct timer_list s_timer;
+struct qib_qp_priv {
+ struct qib_ib_header *s_hdr; /* next packet header to send */
struct list_head iowait; /* link for wait PIO buf */
-
+ atomic_t s_dma_busy;
+ struct qib_verbs_txreq *s_tx;
struct work_struct s_work;
-
wait_queue_head_t wait_dma;
-
- struct qib_sge r_sg_list[0] /* verified SGEs */
- ____cacheline_aligned_in_smp;
+ struct rvt_qp *owner;
};
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define QIB_R_WRID_VALID 0
-#define QIB_R_REWIND_SGE 1
-
-/*
- * Bit definitions for r_flags.
- */
-#define QIB_R_REUSE_SGE 0x01
-#define QIB_R_RDMAR_SEQ 0x02
-#define QIB_R_RSP_NAK 0x04
-#define QIB_R_RSP_SEND 0x08
-#define QIB_R_COMM_EST 0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * QIB_S_BUSY - send tasklet is processing the QP
- * QIB_S_TIMER - the RC retry timer is active
- * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- * before processing the next SWQE
- * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- * before processing the next SWQE
- * QIB_S_WAIT_RNR - waiting for RNR timeout
- * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- * next send completion entry not via send DMA
- * QIB_S_WAIT_PIO - waiting for a send buffer to be available
- * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
- * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
- * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- */
-#define QIB_S_SIGNAL_REQ_WR 0x0001
-#define QIB_S_BUSY 0x0002
-#define QIB_S_TIMER 0x0004
-#define QIB_S_RESP_PENDING 0x0008
-#define QIB_S_ACK_PENDING 0x0010
-#define QIB_S_WAIT_FENCE 0x0020
-#define QIB_S_WAIT_RDMAR 0x0040
-#define QIB_S_WAIT_RNR 0x0080
-#define QIB_S_WAIT_SSN_CREDIT 0x0100
-#define QIB_S_WAIT_DMA 0x0200
-#define QIB_S_WAIT_PIO 0x0400
-#define QIB_S_WAIT_TX 0x0800
-#define QIB_S_WAIT_DMA_DESC 0x1000
-#define QIB_S_WAIT_KMEM 0x2000
-#define QIB_S_WAIT_PSN 0x4000
-#define QIB_S_WAIT_ACK 0x8000
-#define QIB_S_SEND_ONE 0x10000
-#define QIB_S_UNLIMITED_CREDIT 0x20000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
- QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
- QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
- QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
-
-#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
-
#define QIB_PSN_CREDIT 16
-/*
- * Since struct qib_swqe is not a fixed size, we can't simply index into
- * struct qib_qp.s_wq. This function does the array index computation.
- */
-static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
- unsigned n)
-{
- return (struct qib_swqe *)((char *)qp->s_wq +
- (sizeof(struct qib_swqe) +
- qp->s_max_sge *
- sizeof(struct qib_sge)) * n);
-}
-
-/*
- * Since struct qib_rwqe is not a fixed size, we can't simply index into
- * struct qib_rwq.wq. This function does the array index computation.
- */
-static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
-{
- return (struct qib_rwqe *)
- ((char *) rq->wq->wq +
- (sizeof(struct qib_rwqe) +
- rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
- void *page;
-};
-
-struct qib_qpn_table {
- spinlock_t lock; /* protect changes in this struct */
- unsigned flags; /* flags for QP0/1 allocated for each port */
- u32 last; /* last QP number allocated */
- u32 nmaps; /* size of the map table */
- u16 limit;
- u16 mask;
- /* bit map of free QP numbers other than 0/1 */
- struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct qib_lkey_table {
- spinlock_t lock; /* protect changes in this struct */
- u32 next; /* next unused index (speeds search) */
- u32 gen; /* generation count */
- u32 max; /* size of the table */
- struct qib_mregion __rcu **table;
-};
-
struct qib_opcode_stats {
u64 n_packets; /* number of packets */
u64 n_bytes; /* total number of bytes */
@@ -682,21 +218,9 @@ struct qib_pma_counters {
};
struct qib_ibport {
- struct qib_qp __rcu *qp0;
- struct qib_qp __rcu *qp1;
- struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
- struct qib_ah *sm_ah;
- struct qib_ah *smi_ah;
- struct rb_root mcast_tree;
- spinlock_t lock; /* protect changes in this struct */
-
- /* non-zero when timer is set */
- unsigned long mkey_lease_timeout;
- unsigned long trap_timeout;
- __be64 gid_prefix; /* in network order */
- __be64 mkey;
+ struct rvt_ibport rvp;
+ struct rvt_ah *smi_ah;
__be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
- u64 tid; /* TID for traps */
struct qib_pma_counters __percpu *pmastats;
u64 z_unicast_xmit; /* starting count for PMA */
u64 z_unicast_rcv; /* starting count for PMA */
@@ -715,82 +239,25 @@ struct qib_ibport {
u32 z_local_link_integrity_errors; /* starting count for PMA */
u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
u32 z_vl15_dropped; /* starting count for PMA */
- u32 n_rc_resends;
- u32 n_rc_acks;
- u32 n_rc_qacks;
- u32 n_rc_delayed_comp;
- u32 n_seq_naks;
- u32 n_rdma_seq;
- u32 n_rnr_naks;
- u32 n_other_naks;
- u32 n_loop_pkts;
- u32 n_pkt_drops;
- u32 n_vl15_dropped;
- u32 n_rc_timeouts;
- u32 n_dmawait;
- u32 n_unaligned;
- u32 n_rc_dupreq;
- u32 n_rc_seqnak;
- u32 port_cap_flags;
- u32 pma_sample_start;
- u32 pma_sample_interval;
- __be16 pma_counter_select[5];
- u16 pma_tag;
- u16 pkey_violations;
- u16 qkey_violations;
- u16 mkey_violations;
- u16 mkey_lease_period;
- u16 sm_lid;
- u16 repress_traps;
- u8 sm_sl;
- u8 mkeyprot;
- u8 subnet_timeout;
- u8 vl_high_limit;
u8 sl_to_vl[16];
-
};
-
struct qib_ibdev {
- struct ib_device ibdev;
- struct list_head pending_mmaps;
- spinlock_t mmap_offset_lock; /* protect mmap_offset */
- u32 mmap_offset;
- struct qib_mregion __rcu *dma_mr;
-
- /* QP numbers are shared by all IB ports */
- struct qib_qpn_table qpn_table;
- struct qib_lkey_table lk_table;
+ struct rvt_dev_info rdi;
+
struct list_head piowait; /* list for wait PIO buf */
struct list_head dmawait; /* list for wait DMA */
struct list_head txwait; /* list for wait qib_verbs_txreq */
struct list_head memwait; /* list for wait kernel memory */
struct list_head txreq_free;
struct timer_list mem_timer;
- struct qib_qp __rcu **qp_table;
struct qib_pio_header *pio_hdrs;
dma_addr_t pio_hdrs_phys;
- /* list of QPs waiting for RNR timer */
- spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
- u32 qp_table_size; /* size of the hash table */
u32 qp_rnd; /* random bytes for hash */
- spinlock_t qpt_lock;
u32 n_piowait;
u32 n_txwait;
- u32 n_pds_allocated; /* number of PDs allocated for device */
- spinlock_t n_pds_lock;
- u32 n_ahs_allocated; /* number of AHs allocated for device */
- spinlock_t n_ahs_lock;
- u32 n_cqs_allocated; /* number of CQs allocated for device */
- spinlock_t n_cqs_lock;
- u32 n_qps_allocated; /* number of QPs allocated for device */
- spinlock_t n_qps_lock;
- u32 n_srqs_allocated; /* number of SRQs allocated for device */
- spinlock_t n_srqs_lock;
- u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
- spinlock_t n_mcast_grps_lock;
#ifdef CONFIG_DEBUG_FS
/* per HCA debugfs */
struct dentry *qib_ibdev_dbg;
@@ -813,56 +280,27 @@ struct qib_verbs_counters {
u32 vl15_dropped;
};
-static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct qib_mr, ibmr);
-}
-
-static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct qib_pd, ibpd);
-}
-
-static inline struct qib_ah *to_iah(struct ib_ah *ibah)
-{
- return container_of(ibah, struct qib_ah, ibah);
-}
-
-static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct qib_cq, ibcq);
-}
-
-static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
-{
- return container_of(ibsrq, struct qib_srq, ibsrq);
-}
-
-static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct qib_qp, ibqp);
-}
-
static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
{
- return container_of(ibdev, struct qib_ibdev, ibdev);
+ struct rvt_dev_info *rdi;
+
+ rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+ return container_of(rdi, struct qib_ibdev, rdi);
}
/*
* Send if not busy or waiting for I/O and either
* a RC response is pending or we can process send work requests.
*/
-static inline int qib_send_ok(struct qib_qp *qp)
+static inline int qib_send_ok(struct rvt_qp *qp)
{
- return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
- (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
- !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+ return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+ (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
-/*
- * This must be called with s_lock held.
- */
-void qib_schedule_send(struct qib_qp *qp);
+void _qib_schedule_send(struct rvt_qp *qp);
+void qib_schedule_send(struct rvt_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
{
@@ -878,7 +316,7 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
-void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
void qib_sys_guid_chg(struct qib_ibport *ibp);
void qib_node_desc_chg(struct qib_ibport *ibp);
int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -886,8 +324,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_mad_hdr *in, size_t in_mad_size,
struct ib_mad_hdr *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
-int qib_create_agents(struct qib_ibdev *dev);
-void qib_free_agents(struct qib_ibdev *dev);
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx);
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx);
/*
* Compare the lower 24 bits of the two values.
@@ -898,8 +336,6 @@ static inline int qib_cmp24(u32 a, u32 b)
return (((int) a) - ((int) b)) << 8;
}
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
-
int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
u64 *rwords, u64 *spkts, u64 *rpkts,
u64 *xmit_wait);
@@ -907,35 +343,17 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
int qib_get_counters(struct qib_pportdata *ppd,
struct qib_verbs_counters *cntrs);
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp);
-
-__be32 qib_compute_aeth(struct qib_qp *qp);
-
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
-
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-
-int qib_destroy_qp(struct ib_qp *ibqp);
-
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
-
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata);
-
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned qib_free_all_qps(struct qib_devdata *dd);
+__be32 qib_compute_aeth(struct rvt_qp *qp);
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
-
-void qib_free_qpn_table(struct qib_qpn_table *qpt);
+/*
+ * Functions provided by qib driver for rdmavt to use
+ */
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+void qib_notify_qp_reset(struct rvt_qp *qp);
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+ enum ib_qp_type type, u8 port, gfp_t gfp);
#ifdef CONFIG_DEBUG_FS
@@ -949,7 +367,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
#endif
-void qib_get_credit(struct qib_qp *qp, u32 aeth);
+void qib_get_credit(struct rvt_qp *qp, u32 aeth);
unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
@@ -957,166 +375,66 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
void qib_put_txreq(struct qib_verbs_txreq *tx);
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
- u32 hdrwords, struct qib_sge_state *ss, u32 len);
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct rvt_sge_state *ss, u32 len);
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
int release);
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
void qib_rc_rnr_retry(unsigned long arg);
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
-int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
- int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-
-void qib_free_lkey(struct qib_mregion *mr);
-
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
- struct qib_sge *isge, struct ib_sge *sge, int acc);
-
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
- u32 len, u64 vaddr, u32 rkey, int acc);
-
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
-
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask,
- struct ib_udata *udata);
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int qib_destroy_srq(struct ib_srq *ibsrq);
-
-int qib_cq_init(struct qib_devdata *dd);
-
-void qib_cq_exit(struct qib_devdata *dd);
-
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
-
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-
-int qib_destroy_cq(struct ib_cq *ibcq);
-
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_udata *udata);
-
-int qib_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_entries);
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
-
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
-
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova);
-
-int qib_unmap_fmr(struct list_head *fmr_list);
-
-int qib_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void qib_get_mr(struct qib_mregion *mr)
-{
- atomic_inc(&mr->refcount);
-}
+ int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
void mr_rcu_callback(struct rcu_head *list);
-static inline void qib_put_mr(struct qib_mregion *mr)
-{
- if (unlikely(atomic_dec_and_test(&mr->refcount)))
- call_rcu(&mr->list, mr_rcu_callback);
-}
-
-static inline void qib_put_ss(struct qib_sge_state *ss)
-{
- while (ss->num_sge) {
- qib_put_mr(ss->sge.mr);
- if (--ss->num_sge)
- ss->sge = *ss->sg_list++;
- }
-}
-
-
-void qib_release_mmap_info(struct kref *ref);
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
- struct ib_ucontext *context,
- void *obj);
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
- u32 size, void *obj);
-
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
-
-void qib_migrate_qp(struct qib_qp *qp);
+void qib_migrate_qp(struct rvt_qp *qp);
int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
- int has_grh, struct qib_qp *qp, u32 bth0);
+ int has_grh, struct rvt_qp *qp, u32 bth0);
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
u32 bth0, u32 bth2);
-void qib_do_send(struct work_struct *work);
+void _qib_do_send(struct work_struct *work);
+
+void qib_do_send(struct rvt_qp *qp);
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
-void qib_send_rc_ack(struct qib_qp *qp);
+void qib_send_rc_ack(struct rvt_qp *qp);
-int qib_make_rc_req(struct qib_qp *qp);
+int qib_make_rc_req(struct rvt_qp *qp);
-int qib_make_uc_req(struct qib_qp *qp);
+int qib_make_uc_req(struct rvt_qp *qp);
-int qib_make_ud_req(struct qib_qp *qp);
+int qib_make_ud_req(struct rvt_qp *qp);
int qib_register_ib_device(struct qib_devdata *);
@@ -1150,11 +468,11 @@ extern const enum ib_wc_opcode ib_qib_wc_opcode[];
#define IB_PHYSPORTSTATE_CFG_ENH 0x10
#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
-extern const int ib_qib_state_ops[];
+extern const int ib_rvt_state_ops[];
extern __be64 ib_qib_sys_image_guid; /* in network order */
-extern unsigned int ib_qib_lkey_table_size;
+extern unsigned int ib_rvt_lkey_table_size;
extern unsigned int ib_qib_max_cqes;
@@ -1178,6 +496,4 @@ extern unsigned int ib_qib_max_srq_wrs;
extern const u32 ib_qib_rnr_table[];
-extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
-
#endif /* QIB_VERBS_H */
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
deleted file mode 100644
index b2fb5286dbd9..000000000000
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-
-#include "qib.h"
-
-/**
- * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
-{
- struct qib_mcast_qp *mqp;
-
- mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
- if (!mqp)
- goto bail;
-
- mqp->qp = qp;
- atomic_inc(&qp->refcount);
-
-bail:
- return mqp;
-}
-
-static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
-{
- struct qib_qp *qp = mqp->qp;
-
- /* Notify qib_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
-
- kfree(mqp);
-}
-
-/**
- * qib_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
-{
- struct qib_mcast *mcast;
-
- mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
- if (!mcast)
- goto bail;
-
- mcast->mgid = *mgid;
- INIT_LIST_HEAD(&mcast->qp_list);
- init_waitqueue_head(&mcast->wait);
- atomic_set(&mcast->refcount, 0);
- mcast->n_attached = 0;
-
-bail:
- return mcast;
-}
-
-static void qib_mcast_free(struct qib_mcast *mcast)
-{
- struct qib_mcast_qp *p, *tmp;
-
- list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
- qib_mcast_qp_free(p);
-
- kfree(mcast);
-}
-
-/**
- * qib_mcast_find - search the global table for the given multicast GID
- * @ibp: the IB port structure
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
-{
- struct rb_node *n;
- unsigned long flags;
- struct qib_mcast *mcast;
-
- spin_lock_irqsave(&ibp->lock, flags);
- n = ibp->mcast_tree.rb_node;
- while (n) {
- int ret;
-
- mcast = rb_entry(n, struct qib_mcast, rb_node);
-
- ret = memcmp(mgid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
- if (ret < 0)
- n = n->rb_left;
- else if (ret > 0)
- n = n->rb_right;
- else {
- atomic_inc(&mcast->refcount);
- spin_unlock_irqrestore(&ibp->lock, flags);
- goto bail;
- }
- }
- spin_unlock_irqrestore(&ibp->lock, flags);
-
- mcast = NULL;
-
-bail:
- return mcast;
-}
-
-/**
- * qib_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added. Return EEXIST if the GID was already in
- * the table but the QP was added. Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
- struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
-{
- struct rb_node **n = &ibp->mcast_tree.rb_node;
- struct rb_node *pn = NULL;
- int ret;
-
- spin_lock_irq(&ibp->lock);
-
- while (*n) {
- struct qib_mcast *tmcast;
- struct qib_mcast_qp *p;
-
- pn = *n;
- tmcast = rb_entry(pn, struct qib_mcast, rb_node);
-
- ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
- sizeof(union ib_gid));
- if (ret < 0) {
- n = &pn->rb_left;
- continue;
- }
- if (ret > 0) {
- n = &pn->rb_right;
- continue;
- }
-
- /* Search the QP list to see if this is already there. */
- list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
- if (p->qp == mqp->qp) {
- ret = ESRCH;
- goto bail;
- }
- }
- if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
- ret = ENOMEM;
- goto bail;
- }
-
- tmcast->n_attached++;
-
- list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
- ret = EEXIST;
- goto bail;
- }
-
- spin_lock(&dev->n_mcast_grps_lock);
- if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
- spin_unlock(&dev->n_mcast_grps_lock);
- ret = ENOMEM;
- goto bail;
- }
-
- dev->n_mcast_grps_allocated++;
- spin_unlock(&dev->n_mcast_grps_lock);
-
- mcast->n_attached++;
-
- list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
- atomic_inc(&mcast->refcount);
- rb_link_node(&mcast->rb_node, pn, n);
- rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
-
- ret = 0;
-
-bail:
- spin_unlock_irq(&ibp->lock);
-
- return ret;
-}
-
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- struct qib_qp *qp = to_iqp(ibqp);
- struct qib_ibdev *dev = to_idev(ibqp->device);
- struct qib_ibport *ibp;
- struct qib_mcast *mcast;
- struct qib_mcast_qp *mqp;
- int ret;
-
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
-
- /*
- * Allocate data structures since its better to do this outside of
- * spin locks and it will most likely be needed.
- */
- mcast = qib_mcast_alloc(gid);
- if (mcast == NULL) {
- ret = -ENOMEM;
- goto bail;
- }
- mqp = qib_mcast_qp_alloc(qp);
- if (mqp == NULL) {
- qib_mcast_free(mcast);
- ret = -ENOMEM;
- goto bail;
- }
- ibp = to_iport(ibqp->device, qp->port_num);
- switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
- case ESRCH:
- /* Neither was used: OK to attach the same QP twice. */
- qib_mcast_qp_free(mqp);
- qib_mcast_free(mcast);
- break;
-
- case EEXIST: /* The mcast wasn't used */
- qib_mcast_free(mcast);
- break;
-
- case ENOMEM:
- /* Exceeded the maximum number of mcast groups. */
- qib_mcast_qp_free(mqp);
- qib_mcast_free(mcast);
- ret = -ENOMEM;
- goto bail;
-
- default:
- break;
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- struct qib_qp *qp = to_iqp(ibqp);
- struct qib_ibdev *dev = to_idev(ibqp->device);
- struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
- struct qib_mcast *mcast = NULL;
- struct qib_mcast_qp *p, *tmp, *delp = NULL;
- struct rb_node *n;
- int last = 0;
- int ret;
-
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
- return -EINVAL;
-
- spin_lock_irq(&ibp->lock);
-
- /* Find the GID in the mcast table. */
- n = ibp->mcast_tree.rb_node;
- while (1) {
- if (n == NULL) {
- spin_unlock_irq(&ibp->lock);
- return -EINVAL;
- }
-
- mcast = rb_entry(n, struct qib_mcast, rb_node);
- ret = memcmp(gid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
- if (ret < 0)
- n = n->rb_left;
- else if (ret > 0)
- n = n->rb_right;
- else
- break;
- }
-
- /* Search the QP list. */
- list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
- if (p->qp != qp)
- continue;
- /*
- * We found it, so remove it, but don't poison the forward
- * link until we are sure there are no list walkers.
- */
- list_del_rcu(&p->list);
- mcast->n_attached--;
- delp = p;
-
- /* If this was the last attached QP, remove the GID too. */
- if (list_empty(&mcast->qp_list)) {
- rb_erase(&mcast->rb_node, &ibp->mcast_tree);
- last = 1;
- }
- break;
- }
-
- spin_unlock_irq(&ibp->lock);
- /* QP not attached */
- if (!delp)
- return -EINVAL;
- /*
- * Wait for any list walkers to finish before freeing the
- * list element.
- */
- wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
- qib_mcast_qp_free(delp);
-
- if (last) {
- atomic_dec(&mcast->refcount);
- wait_event(mcast->wait, !atomic_read(&mcast->refcount));
- qib_mcast_free(mcast);
- spin_lock_irq(&dev->n_mcast_grps_lock);
- dev->n_mcast_grps_allocated--;
- spin_unlock_irq(&dev->n_mcast_grps_lock);
- }
- return 0;
-}
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp)
-{
- return ibp->mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
new file mode 100644
index 000000000000..988b6a0101a4
--- /dev/null
+++ b/drivers/infiniband/sw/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
new file mode 100644
index 000000000000..11aa6a34bd71
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -0,0 +1,6 @@
+config INFINIBAND_RDMAVT
+ tristate "RDMA verbs transport library"
+ depends on 64BIT
+ default m
+ ---help---
+ This is a common software verbs provider for RDMA networks.
diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile
new file mode 100644
index 000000000000..ccaa7992ac97
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/Makefile
@@ -0,0 +1,13 @@
+#
+# rdmavt driver
+#
+#
+#
+# Called from the kernel module build system.
+#
+obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
+
+rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
+ trace.o
+
+CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
new file mode 100644
index 000000000000..16c446142c2a
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "ah.h"
+#include "vt.h" /* for prints */
+
+/**
+ * rvt_check_ah - validate the attributes of AH
+ * @ibdev: the ib device
+ * @ah_attr: the attributes of the AH
+ *
+ * If driver supports a more detailed check_ah function call back to it
+ * otherwise just check the basics.
+ *
+ * Return: 0 on success
+ */
+int rvt_check_ah(struct ib_device *ibdev,
+ struct ib_ah_attr *ah_attr)
+{
+ int err;
+ struct ib_port_attr port_attr;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ enum rdma_link_layer link = rdma_port_get_link_layer(ibdev,
+ ah_attr->port_num);
+
+ err = ib_query_port(ibdev, ah_attr->port_num, &port_attr);
+ if (err)
+ return -EINVAL;
+ if (ah_attr->port_num < 1 ||
+ ah_attr->port_num > ibdev->phys_port_cnt)
+ return -EINVAL;
+ if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+ ib_rate_to_mbps(ah_attr->static_rate) < 0)
+ return -EINVAL;
+ if ((ah_attr->ah_flags & IB_AH_GRH) &&
+ ah_attr->grh.sgid_index >= port_attr.gid_tbl_len)
+ return -EINVAL;
+ if (link != IB_LINK_LAYER_ETHERNET) {
+ if (ah_attr->dlid == 0)
+ return -EINVAL;
+ if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+ ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
+ !(ah_attr->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+ }
+ if (rdi->driver_f.check_ah)
+ return rdi->driver_f.check_ah(ibdev, ah_attr);
+ return 0;
+}
+EXPORT_SYMBOL(rvt_check_ah);
+
+/**
+ * rvt_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: newly allocated ah
+ */
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ struct rvt_ah *ah;
+ struct rvt_dev_info *dev = ib_to_rvt(pd->device);
+ unsigned long flags;
+
+ if (rvt_check_ah(pd->device, ah_attr))
+ return ERR_PTR(-EINVAL);
+
+ ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
+ spin_unlock(&dev->n_ahs_lock);
+ kfree(ah);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dev->n_ahs_allocated++;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ ah->attr = *ah_attr;
+ atomic_set(&ah->refcount, 0);
+
+ if (dev->driver_f.notify_new_ah)
+ dev->driver_f.notify_new_ah(pd->device, ah_attr, ah);
+
+ return &ah->ibah;
+}
+
+/**
+ * rvt_destory_ah - Destory an address handle
+ * @ibah: address handle
+ *
+ * Return: 0 on success
+ */
+int rvt_destroy_ah(struct ib_ah *ibah)
+{
+ struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
+ struct rvt_ah *ah = ibah_to_rvtah(ibah);
+ unsigned long flags;
+
+ if (atomic_read(&ah->refcount) != 0)
+ return -EBUSY;
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ dev->n_ahs_allocated--;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ kfree(ah);
+
+ return 0;
+}
+
+/**
+ * rvt_modify_ah - modify an ah with given attrs
+ * @ibah: address handle to modify
+ * @ah_attr: attrs to apply
+ *
+ * Return: 0 on success
+ */
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+ if (rvt_check_ah(ibah->device, ah_attr))
+ return -EINVAL;
+
+ ah->attr = *ah_attr;
+
+ return 0;
+}
+
+/**
+ * rvt_query_ah - return attrs for ah
+ * @ibah: address handle to query
+ * @ah_attr: return info in this
+ *
+ * Return: always 0
+ */
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+ *ah_attr = ah->attr;
+
+ return 0;
+}
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
new file mode 100644
index 000000000000..e9c36be87d79
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -0,0 +1,59 @@
+#ifndef DEF_RVTAH_H
+#define DEF_RVTAH_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr);
+int rvt_destroy_ah(struct ib_ah *ibah);
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+
+#endif /* DEF_RVTAH_H */
diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 4f046ffe7e60..b1ffc8b4a6c0 100644
--- a/drivers/staging/rdma/hfi1/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -48,25 +45,23 @@
*
*/
-#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/kthread.h>
-
-#include "verbs.h"
-#include "hfi.h"
+#include "cq.h"
+#include "vt.h"
/**
- * hfi1_cq_enter - add a new entry to the completion queue
+ * rvt_cq_enter - add a new entry to the completion queue
* @cq: completion queue
* @entry: work completion entry to add
- * @sig: true if @entry is a solicited entry
+ * @sig: true if @entry is solicited
*
* This may be called with qp->s_lock held.
*/
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
+void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
{
- struct hfi1_cq_wc *wc;
+ struct rvt_cq_wc *wc;
unsigned long flags;
u32 head;
u32 next;
@@ -79,11 +74,13 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
*/
wc = cq->queue;
head = wc->head;
- if (head >= (unsigned) cq->ibcq.cqe) {
+ if (head >= (unsigned)cq->ibcq.cqe) {
head = cq->ibcq.cqe;
next = 0;
- } else
+ } else {
next = head + 1;
+ }
+
if (unlikely(next == wc->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
@@ -114,8 +111,9 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
wc->uqueue[head].port_num = entry->port_num;
/* Make sure entry is written before the head index. */
smp_wmb();
- } else
+ } else {
wc->kqueue[head] = *entry;
+ }
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -126,10 +124,10 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
* This will cause send_complete() to be called in
* another thread.
*/
- smp_read_barrier_depends(); /* see hfi1_cq_exit */
- worker = cq->dd->worker;
+ smp_read_barrier_depends(); /* see rvt_cq_exit */
+ worker = cq->rdi->worker;
if (likely(worker)) {
- cq->notify = IB_CQ_NONE;
+ cq->notify = RVT_CQ_NONE;
cq->triggered++;
queue_kthread_work(worker, &cq->comptask);
}
@@ -137,59 +135,11 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
spin_unlock_irqrestore(&cq->lock, flags);
}
-
-/**
- * hfi1_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context. Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
- struct hfi1_cq *cq = to_icq(ibcq);
- struct hfi1_cq_wc *wc;
- unsigned long flags;
- int npolled;
- u32 tail;
-
- /* The kernel can only poll a kernel completion queue */
- if (cq->ip) {
- npolled = -EINVAL;
- goto bail;
- }
-
- spin_lock_irqsave(&cq->lock, flags);
-
- wc = cq->queue;
- tail = wc->tail;
- if (tail > (u32) cq->ibcq.cqe)
- tail = (u32) cq->ibcq.cqe;
- for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
- if (tail == wc->head)
- break;
- /* The kernel doesn't need a RMB since it has the lock. */
- *entry = wc->kqueue[tail];
- if (tail >= cq->ibcq.cqe)
- tail = 0;
- else
- tail++;
- }
- wc->tail = tail;
-
- spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
- return npolled;
-}
+EXPORT_SYMBOL(rvt_cq_enter);
static void send_complete(struct kthread_work *work)
{
- struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask);
+ struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
/*
* The completion handler will most likely rearm the notification
@@ -217,26 +167,25 @@ static void send_complete(struct kthread_work *work)
}
/**
- * hfi1_create_cq - create a completion queue
+ * rvt_create_cq - create a completion queue
* @ibdev: the device this completion queue is attached to
* @attr: creation attributes
- * @context: unused by the driver
+ * @context: unused by the QLogic_IB driver
* @udata: user data for libibverbs.so
*
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
* Called by ib_create_cq() in the generic verbs code.
+ *
+ * Return: pointer to the completion queue or negative errno values
+ * for failure.
*/
-struct ib_cq *hfi1_create_cq(
- struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
- struct hfi1_ibdev *dev = to_idev(ibdev);
- struct hfi1_cq *cq;
- struct hfi1_cq_wc *wc;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ struct rvt_cq *cq;
+ struct rvt_cq_wc *wc;
struct ib_cq *ret;
u32 sz;
unsigned int entries = attr->cqe;
@@ -244,11 +193,11 @@ struct ib_cq *hfi1_create_cq(
if (attr->flags)
return ERR_PTR(-EINVAL);
- if (entries < 1 || entries > hfi1_max_cqes)
+ if (entries < 1 || entries > rdi->dparms.props.max_cqe)
return ERR_PTR(-EINVAL);
/* Allocate the completion queue structure. */
- cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -272,12 +221,12 @@ struct ib_cq *hfi1_create_cq(
/*
* Return the address of the WC as the offset to mmap.
- * See hfi1_mmap() for details.
+ * See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
- cq->ip = hfi1_create_mmap_info(dev, sz, context, wc);
+ cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
if (!cq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wc;
@@ -289,23 +238,22 @@ struct ib_cq *hfi1_create_cq(
ret = ERR_PTR(err);
goto bail_ip;
}
- } else
- cq->ip = NULL;
+ }
- spin_lock(&dev->n_cqs_lock);
- if (dev->n_cqs_allocated == hfi1_max_cqs) {
- spin_unlock(&dev->n_cqs_lock);
+ spin_lock(&rdi->n_cqs_lock);
+ if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
+ spin_unlock(&rdi->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
- dev->n_cqs_allocated++;
- spin_unlock(&dev->n_cqs_lock);
+ rdi->n_cqs_allocated++;
+ spin_unlock(&rdi->n_cqs_lock);
if (cq->ip) {
- spin_lock_irq(&dev->pending_lock);
- list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
+ spin_lock_irq(&rdi->pending_lock);
+ list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
+ spin_unlock_irq(&rdi->pending_lock);
}
/*
@@ -313,14 +261,11 @@ struct ib_cq *hfi1_create_cq(
* The number of entries should be >= the number requested or return
* an error.
*/
- cq->dd = dd_from_dev(dev);
+ cq->rdi = rdi;
cq->ibcq.cqe = entries;
- cq->notify = IB_CQ_NONE;
- cq->triggered = 0;
+ cq->notify = RVT_CQ_NONE;
spin_lock_init(&cq->lock);
init_kthread_work(&cq->comptask, send_complete);
- wc->head = 0;
- wc->tail = 0;
cq->queue = wc;
ret = &cq->ibcq;
@@ -338,24 +283,24 @@ done:
}
/**
- * hfi1_destroy_cq - destroy a completion queue
+ * rvt_destroy_cq - destroy a completion queue
* @ibcq: the completion queue to destroy.
*
- * Returns 0 for success.
- *
* Called by ib_destroy_cq() in the generic verbs code.
+ *
+ * Return: always 0
*/
-int hfi1_destroy_cq(struct ib_cq *ibcq)
+int rvt_destroy_cq(struct ib_cq *ibcq)
{
- struct hfi1_ibdev *dev = to_idev(ibcq->device);
- struct hfi1_cq *cq = to_icq(ibcq);
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+ struct rvt_dev_info *rdi = cq->rdi;
flush_kthread_work(&cq->comptask);
- spin_lock(&dev->n_cqs_lock);
- dev->n_cqs_allocated--;
- spin_unlock(&dev->n_cqs_lock);
+ spin_lock(&rdi->n_cqs_lock);
+ rdi->n_cqs_allocated--;
+ spin_unlock(&rdi->n_cqs_lock);
if (cq->ip)
- kref_put(&cq->ip->ref, hfi1_release_mmap_info);
+ kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->queue);
kfree(cq);
@@ -364,18 +309,18 @@ int hfi1_destroy_cq(struct ib_cq *ibcq)
}
/**
- * hfi1_req_notify_cq - change the notification type for a completion queue
+ * rvt_req_notify_cq - change the notification type for a completion queue
* @ibcq: the completion queue
* @notify_flags: the type of notification to request
*
- * Returns 0 for success.
- *
* This may be called from interrupt context. Also called by
* ib_req_notify_cq() in the generic verbs code.
+ *
+ * Return: 0 for success.
*/
-int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
- struct hfi1_cq *cq = to_icq(ibcq);
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
unsigned long flags;
int ret = 0;
@@ -397,24 +342,23 @@ int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
}
/**
- * hfi1_resize_cq - change the size of the CQ
+ * rvt_resize_cq - change the size of the CQ
* @ibcq: the completion queue
*
- * Returns 0 for success.
+ * Return: 0 for success.
*/
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
- struct hfi1_cq *cq = to_icq(ibcq);
- struct hfi1_cq_wc *old_wc;
- struct hfi1_cq_wc *wc;
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+ struct rvt_cq_wc *old_wc;
+ struct rvt_cq_wc *wc;
u32 head, tail, n;
int ret;
u32 sz;
+ struct rvt_dev_info *rdi = cq->rdi;
- if (cqe < 1 || cqe > hfi1_max_cqes) {
- ret = -EINVAL;
- goto bail;
- }
+ if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
+ return -EINVAL;
/*
* Need to use vmalloc() if we want to support large #s of entries.
@@ -425,10 +369,8 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
else
sz += sizeof(struct ib_wc) * (cqe + 1);
wc = vmalloc_user(sz);
- if (!wc) {
- ret = -ENOMEM;
- goto bail;
- }
+ if (!wc)
+ return -ENOMEM;
/* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
@@ -446,11 +388,11 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
*/
old_wc = cq->queue;
head = old_wc->head;
- if (head > (u32) cq->ibcq.cqe)
- head = (u32) cq->ibcq.cqe;
+ if (head > (u32)cq->ibcq.cqe)
+ head = (u32)cq->ibcq.cqe;
tail = old_wc->tail;
- if (tail > (u32) cq->ibcq.cqe)
- tail = (u32) cq->ibcq.cqe;
+ if (tail > (u32)cq->ibcq.cqe)
+ tail = (u32)cq->ibcq.cqe;
if (head < tail)
n = cq->ibcq.cqe + 1 + head - tail;
else
@@ -464,7 +406,7 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
wc->uqueue[n] = old_wc->uqueue[tail];
else
wc->kqueue[n] = old_wc->kqueue[tail];
- if (tail == (u32) cq->ibcq.cqe)
+ if (tail == (u32)cq->ibcq.cqe)
tail = 0;
else
tail++;
@@ -478,80 +420,131 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
vfree(old_wc);
if (cq->ip) {
- struct hfi1_ibdev *dev = to_idev(ibcq->device);
- struct hfi1_mmap_info *ip = cq->ip;
+ struct rvt_mmap_info *ip = cq->ip;
- hfi1_update_mmap_info(dev, ip, sz, wc);
+ rvt_update_mmap_info(rdi, ip, sz, wc);
/*
* Return the offset to mmap.
- * See hfi1_mmap() for details.
+ * See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
ret = ib_copy_to_udata(udata, &ip->offset,
sizeof(ip->offset));
if (ret)
- goto bail;
+ return ret;
}
- spin_lock_irq(&dev->pending_lock);
+ spin_lock_irq(&rdi->pending_lock);
if (list_empty(&ip->pending_mmaps))
- list_add(&ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
+ list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
+ spin_unlock_irq(&rdi->pending_lock);
}
- ret = 0;
- goto bail;
+ return 0;
bail_unlock:
spin_unlock_irq(&cq->lock);
bail_free:
vfree(wc);
-bail:
return ret;
}
-int hfi1_cq_init(struct hfi1_devdata *dd)
+/**
+ * rvt_poll_cq - poll for work completion entries
+ * @ibcq: the completion queue to poll
+ * @num_entries: the maximum number of entries to return
+ * @entry: pointer to array where work completions are placed
+ *
+ * This may be called from interrupt context. Also called by ib_poll_cq()
+ * in the generic verbs code.
+ *
+ * Return: the number of completion entries polled.
+ */
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+ struct rvt_cq_wc *wc;
+ unsigned long flags;
+ int npolled;
+ u32 tail;
+
+ /* The kernel can only poll a kernel completion queue */
+ if (cq->ip)
+ return -EINVAL;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32)cq->ibcq.cqe)
+ tail = (u32)cq->ibcq.cqe;
+ for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+ if (tail == wc->head)
+ break;
+ /* The kernel doesn't need a RMB since it has the lock. */
+ *entry = wc->kqueue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
+ else
+ tail++;
+ }
+ wc->tail = tail;
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return npolled;
+}
+
+/**
+ * rvt_driver_cq_init - Init cq resources on behalf of driver
+ * @rdi: rvt dev structure
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_cq_init(struct rvt_dev_info *rdi)
{
int ret = 0;
int cpu;
struct task_struct *task;
- if (dd->worker)
+ if (rdi->worker)
return 0;
- dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
- if (!dd->worker)
+ rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
+ if (!rdi->worker)
return -ENOMEM;
- init_kthread_worker(dd->worker);
+ init_kthread_worker(rdi->worker);
task = kthread_create_on_node(
kthread_worker_fn,
- dd->worker,
- dd->assigned_node_id,
- "hfi1_cq%d", dd->unit);
- if (IS_ERR(task))
- goto task_fail;
- cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+ rdi->worker,
+ rdi->dparms.node,
+ "%s", rdi->dparms.cq_name);
+ if (IS_ERR(task)) {
+ kfree(rdi->worker);
+ rdi->worker = NULL;
+ return PTR_ERR(task);
+ }
+
+ cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
kthread_bind(task, cpu);
wake_up_process(task);
-out:
return ret;
-task_fail:
- ret = PTR_ERR(task);
- kfree(dd->worker);
- dd->worker = NULL;
- goto out;
}
-void hfi1_cq_exit(struct hfi1_devdata *dd)
+/**
+ * rvt_cq_exit - tear down cq reources
+ * @rdi: rvt dev structure
+ */
+void rvt_cq_exit(struct rvt_dev_info *rdi)
{
struct kthread_worker *worker;
- worker = dd->worker;
+ worker = rdi->worker;
if (!worker)
return;
/* blocks future queuing from send_complete() */
- dd->worker = NULL;
- smp_wmb(); /* See hfi1_cq_enter */
+ rdi->worker = NULL;
+ smp_wmb(); /* See rdi_cq_enter */
flush_kthread_worker(worker);
kthread_stop(worker->task);
kfree(worker);
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
new file mode 100644
index 000000000000..6182c29eff66
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -0,0 +1,64 @@
+#ifndef DEF_RVTCQ_H
+#define DEF_RVTCQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
+
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq);
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+int rvt_driver_cq_init(struct rvt_dev_info *rdi);
+void rvt_cq_exit(struct rvt_dev_info *rdi);
+#endif /* DEF_RVTCQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
new file mode 100644
index 000000000000..33076a5eee2f
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/dma.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+
+#include "dma.h"
+
+#define BAD_DMA_ADDRESS ((u64)0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
+ size_t size, enum dma_data_direction direction)
+{
+ if (WARN_ON(!valid_dma_direction(direction)))
+ return BAD_DMA_ADDRESS;
+
+ return (u64)cpu_addr;
+}
+
+static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ /* This is a stub, nothing to be done here */
+}
+
+static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ u64 addr;
+
+ if (WARN_ON(!valid_dma_direction(direction)))
+ return BAD_DMA_ADDRESS;
+
+ if (offset + size > PAGE_SIZE)
+ return BAD_DMA_ADDRESS;
+
+ addr = (u64)page_address(page);
+ if (addr)
+ addr += offset;
+
+ return addr;
+}
+
+static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ /* This is a stub, nothing to be done here */
+}
+
+static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ u64 addr;
+ int i;
+ int ret = nents;
+
+ if (WARN_ON(!valid_dma_direction(direction)))
+ return 0;
+
+ for_each_sg(sgl, sg, nents, i) {
+ addr = (u64)page_address(sg_page(sg));
+ if (!addr) {
+ ret = 0;
+ break;
+ }
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
+ }
+ return ret;
+}
+
+static void rvt_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ /* This is a stub, nothing to be done here */
+}
+
+static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+ size_t size, enum dma_data_direction dir)
+{
+}
+
+static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p)
+ addr = page_address(p);
+ if (dma_handle)
+ *dma_handle = (u64)addr;
+ return addr;
+}
+
+static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
+{
+ free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
+ .mapping_error = rvt_mapping_error,
+ .map_single = rvt_dma_map_single,
+ .unmap_single = rvt_dma_unmap_single,
+ .map_page = rvt_dma_map_page,
+ .unmap_page = rvt_dma_unmap_page,
+ .map_sg = rvt_map_sg,
+ .unmap_sg = rvt_unmap_sg,
+ .sync_single_for_cpu = rvt_sync_single_for_cpu,
+ .sync_single_for_device = rvt_sync_single_for_device,
+ .alloc_coherent = rvt_dma_alloc_coherent,
+ .free_coherent = rvt_dma_free_coherent
+};
diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h
new file mode 100644
index 000000000000..979f07e09195
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/dma.h
@@ -0,0 +1,53 @@
+#ifndef DEF_RDMAVTDMA_H
+#define DEF_RDMAVTDMA_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
+
+#endif /* DEF_RDMAVTDMA_H */
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
new file mode 100644
index 000000000000..f6e99778d7ca
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/ib_mad.h>
+#include "mad.h"
+#include "vt.h"
+
+/**
+ * rvt_process_mad - process an incoming MAD packet
+ * @ibdev: the infiniband device this packet came in on
+ * @mad_flags: MAD flags
+ * @port_num: the port number this packet came in on, 1 based from ib core
+ * @in_wc: the work completion entry for this packet
+ * @in_grh: the global route header for this packet
+ * @in_mad: the incoming MAD
+ * @out_mad: any outgoing MAD reply
+ *
+ * Note that the verbs framework has already done the MAD sanity checks,
+ * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ * MADs.
+ *
+ * This is called by the ib_mad module.
+ *
+ * Return: IB_MAD_RESULT_SUCCESS or error
+ */
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in, size_t in_mad_size,
+ struct ib_mad_hdr *out, size_t *out_mad_size,
+ u16 *out_mad_pkey_index)
+{
+ /*
+ * MAD processing is quite different between hfi1 and qib. Therfore this
+ * is expected to be provided by the driver. Other drivers in the future
+ * may chose to implement this but it should not be made into a
+ * requirement.
+ */
+ if (ibport_num_to_idx(ibdev, port_num) < 0)
+ return -EINVAL;
+
+ return IB_MAD_RESULT_FAILURE;
+}
+
+static void rvt_send_mad_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+/**
+ * rvt_create_mad_agents - create mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs to be notified of mad agent creation then call back
+ *
+ * Return 0 on success
+ */
+int rvt_create_mad_agents(struct rvt_dev_info *rdi)
+{
+ struct ib_mad_agent *agent;
+ struct rvt_ibport *rvp;
+ int p;
+ int ret;
+
+ for (p = 0; p < rdi->dparms.nports; p++) {
+ rvp = rdi->ports[p];
+ agent = ib_register_mad_agent(&rdi->ibdev, p + 1,
+ IB_QPT_SMI,
+ NULL, 0, rvt_send_mad_handler,
+ NULL, NULL, 0);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto err;
+ }
+
+ rvp->send_agent = agent;
+
+ if (rdi->driver_f.notify_create_mad_agent)
+ rdi->driver_f.notify_create_mad_agent(rdi, p);
+ }
+
+ return 0;
+
+err:
+ for (p = 0; p < rdi->dparms.nports; p++) {
+ rvp = rdi->ports[p];
+ if (rvp->send_agent) {
+ agent = rvp->send_agent;
+ rvp->send_agent = NULL;
+ ib_unregister_mad_agent(agent);
+ if (rdi->driver_f.notify_free_mad_agent)
+ rdi->driver_f.notify_free_mad_agent(rdi, p);
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * rvt_free_mad_agents - free up mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs notification of mad agent removal make the call back
+ */
+void rvt_free_mad_agents(struct rvt_dev_info *rdi)
+{
+ struct ib_mad_agent *agent;
+ struct rvt_ibport *rvp;
+ int p;
+
+ for (p = 0; p < rdi->dparms.nports; p++) {
+ rvp = rdi->ports[p];
+ if (rvp->send_agent) {
+ agent = rvp->send_agent;
+ rvp->send_agent = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+ if (rvp->sm_ah) {
+ ib_destroy_ah(&rvp->sm_ah->ibah);
+ rvp->sm_ah = NULL;
+ }
+
+ if (rdi->driver_f.notify_free_mad_agent)
+ rdi->driver_f.notify_free_mad_agent(rdi, p);
+ }
+}
+
diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h
new file mode 100644
index 000000000000..a9d6eecc3723
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mad.h
@@ -0,0 +1,60 @@
+#ifndef DEF_RVTMAD_H
+#define DEF_RVTMAD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in, size_t in_mad_size,
+ struct ib_mad_hdr *out, size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
+int rvt_create_mad_agents(struct rvt_dev_info *rdi);
+void rvt_free_mad_agents(struct rvt_dev_info *rdi);
+#endif /* DEF_RVTMAD_H */
diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index afc6b4c61a1d..983d319ac976 100644
--- a/drivers/staging/rdma/hfi1/verbs_mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -48,17 +45,36 @@
*
*/
+#include <linux/slab.h>
+#include <linux/sched.h>
#include <linux/rculist.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
-#include "hfi.h"
+#include "mcast.h"
+
+/**
+ * rvt_driver_mcast - init resources for multicast
+ * @rdi: rvt dev struct
+ *
+ * This is per device that registers with rdmavt
+ */
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi)
+{
+ /*
+ * Anything that needs setup for multicast on a per driver or per rdi
+ * basis should be done in here.
+ */
+ spin_lock_init(&rdi->n_mcast_grps_lock);
+}
/**
* mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
* @qp: the QP to link
*/
-static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp)
+static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
{
- struct hfi1_mcast_qp *mqp;
+ struct rvt_mcast_qp *mqp;
mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
if (!mqp)
@@ -71,9 +87,9 @@ bail:
return mqp;
}
-static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
+static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
{
- struct hfi1_qp *qp = mqp->qp;
+ struct rvt_qp *qp = mqp->qp;
/* Notify hfi1_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
@@ -88,11 +104,11 @@ static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
*
* A list of QPs will be attached to this structure.
*/
-static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
{
- struct hfi1_mcast *mcast;
+ struct rvt_mcast *mcast;
- mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
+ mcast = kzalloc(sizeof(*mcast), GFP_KERNEL);
if (!mcast)
goto bail;
@@ -100,75 +116,72 @@ static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
- mcast->n_attached = 0;
bail:
return mcast;
}
-static void mcast_free(struct hfi1_mcast *mcast)
+static void rvt_mcast_free(struct rvt_mcast *mcast)
{
- struct hfi1_mcast_qp *p, *tmp;
+ struct rvt_mcast_qp *p, *tmp;
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
- mcast_qp_free(p);
+ rvt_mcast_qp_free(p);
kfree(mcast);
}
/**
- * hfi1_mcast_find - search the global table for the given multicast GID
+ * rvt_mcast_find - search the global table for the given multicast GID
* @ibp: the IB port structure
* @mgid: the multicast GID to search for
*
- * Returns NULL if not found.
- *
* The caller is responsible for decrementing the reference count if found.
+ *
+ * Return: NULL if not found.
*/
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid)
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
{
struct rb_node *n;
unsigned long flags;
- struct hfi1_mcast *mcast;
+ struct rvt_mcast *found = NULL;
spin_lock_irqsave(&ibp->lock, flags);
n = ibp->mcast_tree.rb_node;
while (n) {
int ret;
+ struct rvt_mcast *mcast;
- mcast = rb_entry(n, struct hfi1_mcast, rb_node);
+ mcast = rb_entry(n, struct rvt_mcast, rb_node);
ret = memcmp(mgid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
- if (ret < 0)
+ if (ret < 0) {
n = n->rb_left;
- else if (ret > 0)
+ } else if (ret > 0) {
n = n->rb_right;
- else {
+ } else {
atomic_inc(&mcast->refcount);
- spin_unlock_irqrestore(&ibp->lock, flags);
- goto bail;
+ found = mcast;
+ break;
}
}
spin_unlock_irqrestore(&ibp->lock, flags);
-
- mcast = NULL;
-
-bail:
- return mcast;
+ return found;
}
+EXPORT_SYMBOL(rvt_mcast_find);
/**
* mcast_add - insert mcast GID into table and attach QP struct
* @mcast: the mcast GID table
* @mqp: the QP to attach
*
- * Return zero if both were added. Return EEXIST if the GID was already in
+ * Return: zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
* attached and neither structure was added.
*/
-static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
- struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp)
+static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
+ struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
{
struct rb_node **n = &ibp->mcast_tree.rb_node;
struct rb_node *pn = NULL;
@@ -177,11 +190,11 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
spin_lock_irq(&ibp->lock);
while (*n) {
- struct hfi1_mcast *tmcast;
- struct hfi1_mcast_qp *p;
+ struct rvt_mcast *tmcast;
+ struct rvt_mcast_qp *p;
pn = *n;
- tmcast = rb_entry(pn, struct hfi1_mcast, rb_node);
+ tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
sizeof(union ib_gid));
@@ -201,7 +214,8 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
goto bail;
}
}
- if (tmcast->n_attached == hfi1_max_mcast_qp_attached) {
+ if (tmcast->n_attached ==
+ rdi->dparms.props.max_mcast_qp_attach) {
ret = ENOMEM;
goto bail;
}
@@ -213,15 +227,15 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
goto bail;
}
- spin_lock(&dev->n_mcast_grps_lock);
- if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) {
- spin_unlock(&dev->n_mcast_grps_lock);
+ spin_lock(&rdi->n_mcast_grps_lock);
+ if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) {
+ spin_unlock(&rdi->n_mcast_grps_lock);
ret = ENOMEM;
goto bail;
}
- dev->n_mcast_grps_allocated++;
- spin_unlock(&dev->n_mcast_grps_lock);
+ rdi->n_mcast_grps_allocated++;
+ spin_unlock(&rdi->n_mcast_grps_lock);
mcast->n_attached++;
@@ -239,92 +253,98 @@ bail:
return ret;
}
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+/**
+ * rvt_attach_mcast - attach a qp to a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
- struct hfi1_qp *qp = to_iqp(ibqp);
- struct hfi1_ibdev *dev = to_idev(ibqp->device);
- struct hfi1_ibport *ibp;
- struct hfi1_mcast *mcast;
- struct hfi1_mcast_qp *mqp;
- int ret;
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+ struct rvt_mcast *mcast;
+ struct rvt_mcast_qp *mqp;
+ int ret = -ENOMEM;
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+ return -EINVAL;
/*
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
- mcast = mcast_alloc(gid);
- if (mcast == NULL) {
- ret = -ENOMEM;
- goto bail;
- }
- mqp = mcast_qp_alloc(qp);
- if (mqp == NULL) {
- mcast_free(mcast);
- ret = -ENOMEM;
- goto bail;
- }
- ibp = to_iport(ibqp->device, qp->port_num);
- switch (mcast_add(dev, ibp, mcast, mqp)) {
- case ESRCH:
- /* Neither was used: OK to attach the same QP twice. */
- mcast_qp_free(mqp);
- mcast_free(mcast);
- break;
+ mcast = rvt_mcast_alloc(gid);
+ if (!mcast)
+ return -ENOMEM;
- case EEXIST: /* The mcast wasn't used */
- mcast_free(mcast);
- break;
+ mqp = rvt_mcast_qp_alloc(qp);
+ if (!mqp)
+ goto bail_mcast;
+ switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) {
+ case ESRCH:
+ /* Neither was used: OK to attach the same QP twice. */
+ ret = 0;
+ goto bail_mqp;
+ case EEXIST: /* The mcast wasn't used */
+ ret = 0;
+ goto bail_mcast;
case ENOMEM:
/* Exceeded the maximum number of mcast groups. */
- mcast_qp_free(mqp);
- mcast_free(mcast);
ret = -ENOMEM;
- goto bail;
-
+ goto bail_mqp;
default:
break;
}
- ret = 0;
+ return 0;
+
+bail_mqp:
+ rvt_mcast_qp_free(mqp);
+
+bail_mcast:
+ rvt_mcast_free(mcast);
-bail:
return ret;
}
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+/**
+ * rvt_detach_mcast - remove a qp from a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
- struct hfi1_qp *qp = to_iqp(ibqp);
- struct hfi1_ibdev *dev = to_idev(ibqp->device);
- struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num);
- struct hfi1_mcast *mcast = NULL;
- struct hfi1_mcast_qp *p, *tmp;
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+ struct rvt_mcast *mcast = NULL;
+ struct rvt_mcast_qp *p, *tmp, *delp = NULL;
struct rb_node *n;
int last = 0;
- int ret;
+ int ret = 0;
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+ return -EINVAL;
spin_lock_irq(&ibp->lock);
/* Find the GID in the mcast table. */
n = ibp->mcast_tree.rb_node;
while (1) {
- if (n == NULL) {
+ if (!n) {
spin_unlock_irq(&ibp->lock);
- ret = -EINVAL;
- goto bail;
+ return -EINVAL;
}
- mcast = rb_entry(n, struct hfi1_mcast, rb_node);
+ mcast = rb_entry(n, struct rvt_mcast, rb_node);
ret = memcmp(gid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
@@ -345,6 +365,7 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
*/
list_del_rcu(&p->list);
mcast->n_attached--;
+ delp = p;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
@@ -355,31 +376,42 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
spin_unlock_irq(&ibp->lock);
+ /* QP not attached */
+ if (!delp)
+ return -EINVAL;
+
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ rvt_mcast_qp_free(delp);
- if (p) {
- /*
- * Wait for any list walkers to finish before freeing the
- * list element.
- */
- wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
- mcast_qp_free(p);
- }
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
- mcast_free(mcast);
- spin_lock_irq(&dev->n_mcast_grps_lock);
- dev->n_mcast_grps_allocated--;
- spin_unlock_irq(&dev->n_mcast_grps_lock);
+ rvt_mcast_free(mcast);
+ spin_lock_irq(&rdi->n_mcast_grps_lock);
+ rdi->n_mcast_grps_allocated--;
+ spin_unlock_irq(&rdi->n_mcast_grps_lock);
}
- ret = 0;
-
-bail:
- return ret;
+ return 0;
}
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp)
+/**
+ *rvt_mast_tree_empty - determine if any qps are attached to any mcast group
+ *@rdi: rvt dev struct
+ *
+ * Return: in use count
+ */
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi)
{
- return ibp->mcast_tree.rb_node == NULL;
+ int i;
+ int in_use = 0;
+
+ for (i = 0; i < rdi->dparms.nports; i++)
+ if (rdi->ports[i]->mcast_tree.rb_node)
+ in_use++;
+ return in_use;
}
diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h
new file mode 100644
index 000000000000..29f579267608
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mcast.h
@@ -0,0 +1,58 @@
+#ifndef DEF_RVTMCAST_H
+#define DEF_RVTMCAST_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi);
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi);
+
+#endif /* DEF_RVTMCAST_H */
diff --git a/drivers/staging/rdma/hfi1/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index 5173b1c60b3d..e202b8142759 100644
--- a/drivers/staging/rdma/hfi1/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -48,68 +45,74 @@
*
*/
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/errno.h>
#include <asm/pgtable.h>
+#include "mmap.h"
-#include "verbs.h"
+/**
+ * rvt_mmap_init - init link list and lock for mem map
+ * @rdi: rvt dev struct
+ */
+void rvt_mmap_init(struct rvt_dev_info *rdi)
+{
+ INIT_LIST_HEAD(&rdi->pending_mmaps);
+ spin_lock_init(&rdi->pending_lock);
+ rdi->mmap_offset = PAGE_SIZE;
+ spin_lock_init(&rdi->mmap_offset_lock);
+}
/**
- * hfi1_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct hfi1_mmap_info
+ * rvt_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct rvt_mmap_info
*/
-void hfi1_release_mmap_info(struct kref *ref)
+void rvt_release_mmap_info(struct kref *ref)
{
- struct hfi1_mmap_info *ip =
- container_of(ref, struct hfi1_mmap_info, ref);
- struct hfi1_ibdev *dev = to_idev(ip->context->device);
+ struct rvt_mmap_info *ip =
+ container_of(ref, struct rvt_mmap_info, ref);
+ struct rvt_dev_info *rdi = ib_to_rvt(ip->context->device);
- spin_lock_irq(&dev->pending_lock);
+ spin_lock_irq(&rdi->pending_lock);
list_del(&ip->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
+ spin_unlock_irq(&rdi->pending_lock);
vfree(ip->obj);
kfree(ip);
}
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void hfi1_vma_open(struct vm_area_struct *vma)
+static void rvt_vma_open(struct vm_area_struct *vma)
{
- struct hfi1_mmap_info *ip = vma->vm_private_data;
+ struct rvt_mmap_info *ip = vma->vm_private_data;
kref_get(&ip->ref);
}
-static void hfi1_vma_close(struct vm_area_struct *vma)
+static void rvt_vma_close(struct vm_area_struct *vma)
{
- struct hfi1_mmap_info *ip = vma->vm_private_data;
+ struct rvt_mmap_info *ip = vma->vm_private_data;
- kref_put(&ip->ref, hfi1_release_mmap_info);
+ kref_put(&ip->ref, rvt_release_mmap_info);
}
-static struct vm_operations_struct hfi1_vm_ops = {
- .open = hfi1_vma_open,
- .close = hfi1_vma_close,
+static const struct vm_operations_struct rvt_vm_ops = {
+ .open = rvt_vma_open,
+ .close = rvt_vma_close,
};
/**
- * hfi1_mmap - create a new mmap region
+ * rvt_mmap - create a new mmap region
* @context: the IB user context of the process making the mmap() call
* @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
+ *
+ * Return: zero if the mmap is OK. Otherwise, return an errno.
*/
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
- struct hfi1_ibdev *dev = to_idev(context->device);
+ struct rvt_dev_info *rdi = ib_to_rvt(context->device);
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
- struct hfi1_mmap_info *ip, *pp;
+ struct rvt_mmap_info *ip, *pp;
int ret = -EINVAL;
/*
@@ -117,53 +120,60 @@ int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
* Normally, this list is very short since a call to create a
* CQ, QP, or SRQ is soon followed by a call to mmap().
*/
- spin_lock_irq(&dev->pending_lock);
- list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+ spin_lock_irq(&rdi->pending_lock);
+ list_for_each_entry_safe(ip, pp, &rdi->pending_mmaps,
pending_mmaps) {
/* Only the creator is allowed to mmap the object */
- if (context != ip->context || (__u64) offset != ip->offset)
+ if (context != ip->context || (__u64)offset != ip->offset)
continue;
/* Don't allow a mmap larger than the object. */
if (size > ip->size)
break;
list_del_init(&ip->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
+ spin_unlock_irq(&rdi->pending_lock);
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret)
goto done;
- vma->vm_ops = &hfi1_vm_ops;
+ vma->vm_ops = &rvt_vm_ops;
vma->vm_private_data = ip;
- hfi1_vma_open(vma);
+ rvt_vma_open(vma);
goto done;
}
- spin_unlock_irq(&dev->pending_lock);
+ spin_unlock_irq(&rdi->pending_lock);
done:
return ret;
}
-/*
- * Allocate information for hfi1_mmap
+/**
+ * rvt_create_mmap_info - allocate information for hfi1_mmap
+ * @rdi: rvt dev struct
+ * @size: size in bytes to map
+ * @context: user context
+ * @obj: opaque pointer to a cq, wq etc
+ *
+ * Return: rvt_mmap struct on success
*/
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
- u32 size,
- struct ib_ucontext *context,
- void *obj) {
- struct hfi1_mmap_info *ip;
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj)
+{
+ struct rvt_mmap_info *ip;
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node);
if (!ip)
- goto bail;
+ return ip;
size = PAGE_ALIGN(size);
- spin_lock_irq(&dev->mmap_offset_lock);
- if (dev->mmap_offset == 0)
- dev->mmap_offset = PAGE_SIZE;
- ip->offset = dev->mmap_offset;
- dev->mmap_offset += size;
- spin_unlock_irq(&dev->mmap_offset_lock);
+ spin_lock_irq(&rdi->mmap_offset_lock);
+ if (rdi->mmap_offset == 0)
+ rdi->mmap_offset = PAGE_SIZE;
+ ip->offset = rdi->mmap_offset;
+ rdi->mmap_offset += size;
+ spin_unlock_irq(&rdi->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
ip->size = size;
@@ -171,21 +181,27 @@ struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
ip->obj = obj;
kref_init(&ip->ref);
-bail:
return ip;
}
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
- u32 size, void *obj)
+/**
+ * rvt_update_mmap_info - update a mem map
+ * @rdi: rvt dev struct
+ * @ip: mmap info pointer
+ * @size: size to grow by
+ * @obj: opaque pointer to cq, wq, etc.
+ */
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+ u32 size, void *obj)
{
size = PAGE_ALIGN(size);
- spin_lock_irq(&dev->mmap_offset_lock);
- if (dev->mmap_offset == 0)
- dev->mmap_offset = PAGE_SIZE;
- ip->offset = dev->mmap_offset;
- dev->mmap_offset += size;
- spin_unlock_irq(&dev->mmap_offset_lock);
+ spin_lock_irq(&rdi->mmap_offset_lock);
+ if (rdi->mmap_offset == 0)
+ rdi->mmap_offset = PAGE_SIZE;
+ ip->offset = rdi->mmap_offset;
+ rdi->mmap_offset += size;
+ spin_unlock_irq(&rdi->mmap_offset_lock);
ip->size = size;
ip->obj = obj;
diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h
new file mode 100644
index 000000000000..fab0e7b1daf9
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mmap.h
@@ -0,0 +1,63 @@
+#ifndef DEF_RDMAVTMMAP_H
+#define DEF_RDMAVTMMAP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_mmap_init(struct rvt_dev_info *rdi);
+void rvt_release_mmap_info(struct kref *ref);
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+ u32 size, void *obj);
+
+#endif /* DEF_RDMAVTMMAP_H */
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
new file mode 100644
index 000000000000..0ff765bfd619
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+#include <rdma/rdma_vt.h>
+#include "vt.h"
+#include "mr.h"
+
+/**
+ * rvt_driver_mr_init - Init MR resources per driver
+ * @rdi: rvt dev struct
+ *
+ * Do any intilization needed when a driver registers with rdmavt.
+ *
+ * Return: 0 on success or errno on failure
+ */
+int rvt_driver_mr_init(struct rvt_dev_info *rdi)
+{
+ unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
+ unsigned lk_tab_size;
+ int i;
+
+ /*
+ * The top hfi1_lkey_table_size bits are used to index the
+ * table. The lower 8 bits can be owned by the user (copied from
+ * the LKEY). The remaining bits act as a generation number or tag.
+ */
+ if (!lkey_table_size)
+ return -EINVAL;
+
+ spin_lock_init(&rdi->lkey_table.lock);
+
+ /* ensure generation is at least 4 bits */
+ if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
+ rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
+ lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
+ rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
+ lkey_table_size = rdi->dparms.lkey_table_size;
+ }
+ rdi->lkey_table.max = 1 << lkey_table_size;
+ lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
+ rdi->lkey_table.table = (struct rvt_mregion __rcu **)
+ vmalloc_node(lk_tab_size, rdi->dparms.node);
+ if (!rdi->lkey_table.table)
+ return -ENOMEM;
+
+ RCU_INIT_POINTER(rdi->dma_mr, NULL);
+ for (i = 0; i < rdi->lkey_table.max; i++)
+ RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
+
+ return 0;
+}
+
+/**
+ *rvt_mr_exit: clean up MR
+ *@rdi: rvt dev structure
+ *
+ * called when drivers have unregistered or perhaps failed to register with us
+ */
+void rvt_mr_exit(struct rvt_dev_info *rdi)
+{
+ if (rdi->dma_mr)
+ rvt_pr_err(rdi, "DMA MR not null!\n");
+
+ vfree(rdi->lkey_table.table);
+}
+
+static void rvt_deinit_mregion(struct rvt_mregion *mr)
+{
+ int i = mr->mapsz;
+
+ mr->mapsz = 0;
+ while (i)
+ kfree(mr->map[--i]);
+}
+
+static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
+ int count)
+{
+ int m, i = 0;
+
+ mr->mapsz = 0;
+ m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
+ for (; i < m; i++) {
+ mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
+ if (!mr->map[i]) {
+ rvt_deinit_mregion(mr);
+ return -ENOMEM;
+ }
+ mr->mapsz++;
+ }
+ init_completion(&mr->comp);
+ /* count returning the ptr to user */
+ atomic_set(&mr->refcount, 1);
+ mr->pd = pd;
+ mr->max_segs = count;
+ return 0;
+}
+
+/**
+ * rvt_alloc_lkey - allocate an lkey
+ * @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
+ *
+ */
+static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
+{
+ unsigned long flags;
+ u32 r;
+ u32 n;
+ int ret = 0;
+ struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
+ struct rvt_lkey_table *rkt = &dev->lkey_table;
+
+ rvt_get_mr(mr);
+ spin_lock_irqsave(&rkt->lock, flags);
+
+ /* special case for dma_mr lkey == 0 */
+ if (dma_region) {
+ struct rvt_mregion *tmr;
+
+ tmr = rcu_access_pointer(dev->dma_mr);
+ if (!tmr) {
+ rcu_assign_pointer(dev->dma_mr, mr);
+ mr->lkey_published = 1;
+ } else {
+ rvt_put_mr(mr);
+ }
+ goto success;
+ }
+
+ /* Find the next available LKEY */
+ r = rkt->next;
+ n = r;
+ for (;;) {
+ if (!rcu_access_pointer(rkt->table[r]))
+ break;
+ r = (r + 1) & (rkt->max - 1);
+ if (r == n)
+ goto bail;
+ }
+ rkt->next = (r + 1) & (rkt->max - 1);
+ /*
+ * Make sure lkey is never zero which is reserved to indicate an
+ * unrestricted LKEY.
+ */
+ rkt->gen++;
+ /*
+ * bits are capped to ensure enough bits for generation number
+ */
+ mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
+ ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
+ << 8);
+ if (mr->lkey == 0) {
+ mr->lkey |= 1 << 8;
+ rkt->gen++;
+ }
+ rcu_assign_pointer(rkt->table[r], mr);
+ mr->lkey_published = 1;
+success:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+out:
+ return ret;
+bail:
+ rvt_put_mr(mr);
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = -ENOMEM;
+ goto out;
+}
+
+/**
+ * rvt_free_lkey - free an lkey
+ * @mr: mr to free from tables
+ */
+static void rvt_free_lkey(struct rvt_mregion *mr)
+{
+ unsigned long flags;
+ u32 lkey = mr->lkey;
+ u32 r;
+ struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
+ struct rvt_lkey_table *rkt = &dev->lkey_table;
+ int freed = 0;
+
+ spin_lock_irqsave(&rkt->lock, flags);
+ if (!mr->lkey_published)
+ goto out;
+ if (lkey == 0) {
+ RCU_INIT_POINTER(dev->dma_mr, NULL);
+ } else {
+ r = lkey >> (32 - dev->dparms.lkey_table_size);
+ RCU_INIT_POINTER(rkt->table[r], NULL);
+ }
+ mr->lkey_published = 0;
+ freed++;
+out:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ if (freed) {
+ synchronize_rcu();
+ rvt_put_mr(mr);
+ }
+}
+
+static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
+{
+ struct rvt_mr *mr;
+ int rval = -ENOMEM;
+ int m;
+
+ /* Allocate struct plus pointers to first level page tables. */
+ m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
+ mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
+ if (!mr)
+ goto bail;
+
+ rval = rvt_init_mregion(&mr->mr, pd, count);
+ if (rval)
+ goto bail;
+ /*
+ * ib_reg_phys_mr() will initialize mr->ibmr except for
+ * lkey and rkey.
+ */
+ rval = rvt_alloc_lkey(&mr->mr, 0);
+ if (rval)
+ goto bail_mregion;
+ mr->ibmr.lkey = mr->mr.lkey;
+ mr->ibmr.rkey = mr->mr.lkey;
+done:
+ return mr;
+
+bail_mregion:
+ rvt_deinit_mregion(&mr->mr);
+bail:
+ kfree(mr);
+ mr = ERR_PTR(rval);
+ goto done;
+}
+
+static void __rvt_free_mr(struct rvt_mr *mr)
+{
+ rvt_deinit_mregion(&mr->mr);
+ rvt_free_lkey(&mr->mr);
+ vfree(mr);
+}
+
+/**
+ * rvt_get_dma_mr - get a DMA memory region
+ * @pd: protection domain for this memory region
+ * @acc: access flags
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see dma.c).
+ */
+struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct rvt_mr *mr;
+ struct ib_mr *ret;
+ int rval;
+
+ if (ibpd_to_rvtpd(pd)->user)
+ return ERR_PTR(-EPERM);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ rval = rvt_init_mregion(&mr->mr, pd, 0);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail;
+ }
+
+ rval = rvt_alloc_lkey(&mr->mr, 1);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail_mregion;
+ }
+
+ mr->mr.access_flags = acc;
+ ret = &mr->ibmr;
+done:
+ return ret;
+
+bail_mregion:
+ rvt_deinit_mregion(&mr->mr);
+bail:
+ kfree(mr);
+ goto done;
+}
+
+/**
+ * rvt_reg_user_mr - register a userspace memory region
+ * @pd: protection domain for this memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @mr_access_flags: access flags for this memory region
+ * @udata: unused by the driver
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct rvt_mr *mr;
+ struct ib_umem *umem;
+ struct scatterlist *sg;
+ int n, m, entry;
+ struct ib_mr *ret;
+
+ if (length == 0)
+ return ERR_PTR(-EINVAL);
+
+ umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(umem))
+ return (void *)umem;
+
+ n = umem->nmap;
+
+ mr = __rvt_alloc_mr(n, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
+ goto bail_umem;
+ }
+
+ mr->mr.user_base = start;
+ mr->mr.iova = virt_addr;
+ mr->mr.length = length;
+ mr->mr.offset = ib_umem_offset(umem);
+ mr->mr.access_flags = mr_access_flags;
+ mr->umem = umem;
+
+ if (is_power_of_2(umem->page_size))
+ mr->mr.page_shift = ilog2(umem->page_size);
+ m = 0;
+ n = 0;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail_inval;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == RVT_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ return &mr->ibmr;
+
+bail_inval:
+ __rvt_free_mr(mr);
+
+bail_umem:
+ ib_umem_release(umem);
+
+ return ret;
+}
+
+/**
+ * rvt_dereg_mr - unregister and free a memory region
+ * @ibmr: the memory region to free
+ *
+ *
+ * Note that this is called to free MRs created by rvt_get_dma_mr()
+ * or rvt_reg_user_mr().
+ *
+ * Returns 0 on success.
+ */
+int rvt_dereg_mr(struct ib_mr *ibmr)
+{
+ struct rvt_mr *mr = to_imr(ibmr);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
+ int ret = 0;
+ unsigned long timeout;
+
+ rvt_free_lkey(&mr->mr);
+
+ rvt_put_mr(&mr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
+ if (!timeout) {
+ rvt_pr_err(rdi,
+ "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
+ mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+ rvt_get_mr(&mr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ rvt_deinit_mregion(&mr->mr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+ kfree(mr);
+out:
+ return ret;
+}
+
+/**
+ * rvt_alloc_mr - Allocate a memory region usable with the
+ * @pd: protection domain for this memory region
+ * @mr_type: mem region type
+ * @max_num_sg: Max number of segments allowed
+ *
+ * Return: the memory region on success, otherwise return an errno.
+ */
+struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct rvt_mr *mr;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ mr = __rvt_alloc_mr(max_num_sg, pd);
+ if (IS_ERR(mr))
+ return (struct ib_mr *)mr;
+
+ return &mr->ibmr;
+}
+
+/**
+ * rvt_alloc_fmr - allocate a fast memory region
+ * @pd: the protection domain for this memory region
+ * @mr_access_flags: access flags for this memory region
+ * @fmr_attr: fast memory region attributes
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ */
+struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct rvt_fmr *fmr;
+ int m;
+ struct ib_fmr *ret;
+ int rval = -ENOMEM;
+
+ /* Allocate struct plus pointers to first level page tables. */
+ m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
+ fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
+ if (!fmr)
+ goto bail;
+
+ rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ if (rval)
+ goto bail;
+
+ /*
+ * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
+ * rkey.
+ */
+ rval = rvt_alloc_lkey(&fmr->mr, 0);
+ if (rval)
+ goto bail_mregion;
+ fmr->ibfmr.rkey = fmr->mr.lkey;
+ fmr->ibfmr.lkey = fmr->mr.lkey;
+ /*
+ * Resources are allocated but no valid mapping (RKEY can't be
+ * used).
+ */
+ fmr->mr.access_flags = mr_access_flags;
+ fmr->mr.max_segs = fmr_attr->max_pages;
+ fmr->mr.page_shift = fmr_attr->page_shift;
+
+ ret = &fmr->ibfmr;
+done:
+ return ret;
+
+bail_mregion:
+ rvt_deinit_mregion(&fmr->mr);
+bail:
+ kfree(fmr);
+ ret = ERR_PTR(rval);
+ goto done;
+}
+
+/**
+ * rvt_map_phys_fmr - set up a fast memory region
+ * @ibmfr: the fast memory region to set up
+ * @page_list: the list of pages to associate with the fast memory region
+ * @list_len: the number of pages to associate with the fast memory region
+ * @iova: the virtual address of the start of the fast memory region
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success
+ */
+
+int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ struct rvt_fmr *fmr = to_ifmr(ibfmr);
+ struct rvt_lkey_table *rkt;
+ unsigned long flags;
+ int m, n, i;
+ u32 ps;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
+
+ i = atomic_read(&fmr->mr.refcount);
+ if (i > 2)
+ return -EBUSY;
+
+ if (list_len > fmr->mr.max_segs)
+ return -EINVAL;
+
+ rkt = &rdi->lkey_table;
+ spin_lock_irqsave(&rkt->lock, flags);
+ fmr->mr.user_base = iova;
+ fmr->mr.iova = iova;
+ ps = 1 << fmr->mr.page_shift;
+ fmr->mr.length = list_len * ps;
+ m = 0;
+ n = 0;
+ for (i = 0; i < list_len; i++) {
+ fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
+ fmr->mr.map[m]->segs[n].length = ps;
+ if (++n == RVT_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ return 0;
+}
+
+/**
+ * rvt_unmap_fmr - unmap fast memory regions
+ * @fmr_list: the list of fast memory regions to unmap
+ *
+ * Return: 0 on success.
+ */
+int rvt_unmap_fmr(struct list_head *fmr_list)
+{
+ struct rvt_fmr *fmr;
+ struct rvt_lkey_table *rkt;
+ unsigned long flags;
+ struct rvt_dev_info *rdi;
+
+ list_for_each_entry(fmr, fmr_list, ibfmr.list) {
+ rdi = ib_to_rvt(fmr->ibfmr.device);
+ rkt = &rdi->lkey_table;
+ spin_lock_irqsave(&rkt->lock, flags);
+ fmr->mr.user_base = 0;
+ fmr->mr.iova = 0;
+ fmr->mr.length = 0;
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ }
+ return 0;
+}
+
+/**
+ * rvt_dealloc_fmr - deallocate a fast memory region
+ * @ibfmr: the fast memory region to deallocate
+ *
+ * Return: 0 on success.
+ */
+int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+ struct rvt_fmr *fmr = to_ifmr(ibfmr);
+ int ret = 0;
+ unsigned long timeout;
+
+ rvt_free_lkey(&fmr->mr);
+ rvt_put_mr(&fmr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
+ if (!timeout) {
+ rvt_get_mr(&fmr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ rvt_deinit_mregion(&fmr->mr);
+ kfree(fmr);
+out:
+ return ret;
+}
+
+/**
+ * rvt_lkey_ok - check IB SGE for validity and initialize
+ * @rkt: table containing lkey to check SGE against
+ * @pd: protection domain
+ * @isge: outgoing internal SGE
+ * @sge: SGE to check
+ * @acc: access flags
+ *
+ * Check the IB SGE for validity and initialize our internal version
+ * of it.
+ *
+ * Return: 1 if valid and successful, otherwise returns 0.
+ *
+ * increments the reference count upon success
+ *
+ */
+int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+ struct rvt_sge *isge, struct ib_sge *sge, int acc)
+{
+ struct rvt_mregion *mr;
+ unsigned n, m;
+ size_t off;
+ struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
+ /*
+ * We use LKEY == zero for kernel virtual addresses
+ * (see rvt_get_dma_mr and dma.c).
+ */
+ rcu_read_lock();
+ if (sge->lkey == 0) {
+ if (pd->user)
+ goto bail;
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
+ goto bail;
+ atomic_inc(&mr->refcount);
+ rcu_read_unlock();
+
+ isge->mr = mr;
+ isge->vaddr = (void *)sge->addr;
+ isge->length = sge->length;
+ isge->sge_length = sge->length;
+ isge->m = 0;
+ isge->n = 0;
+ goto ok;
+ }
+ mr = rcu_dereference(
+ rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ goto bail;
+
+ off = sge->addr - mr->user_base;
+ if (unlikely(sge->addr < mr->user_base ||
+ off + sge->length > mr->length ||
+ (mr->access_flags & acc) != acc))
+ goto bail;
+ atomic_inc(&mr->refcount);
+ rcu_read_unlock();
+
+ off += mr->offset;
+ if (mr->page_shift) {
+ /*
+ * page sizes are uniform power of 2 so no loop is necessary
+ * entries_spanned_by_off is the number of times the loop below
+ * would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off / RVT_SEGSZ;
+ n = entries_spanned_by_off % RVT_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= RVT_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ }
+ isge->mr = mr;
+ isge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ isge->length = mr->map[m]->segs[n].length - off;
+ isge->sge_length = sge->length;
+ isge->m = m;
+ isge->n = n;
+ok:
+ return 1;
+bail:
+ rcu_read_unlock();
+ return 0;
+}
+EXPORT_SYMBOL(rvt_lkey_ok);
+
+/**
+ * rvt_rkey_ok - check the IB virtual address, length, and RKEY
+ * @qp: qp for validation
+ * @sge: SGE state
+ * @len: length of data
+ * @vaddr: virtual address to place data
+ * @rkey: rkey to check
+ * @acc: access flags
+ *
+ * Return: 1 if successful, otherwise 0.
+ *
+ * increments the reference count upon success
+ */
+int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
+ u32 len, u64 vaddr, u32 rkey, int acc)
+{
+ struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
+ struct rvt_lkey_table *rkt = &dev->lkey_table;
+ struct rvt_mregion *mr;
+ unsigned n, m;
+ size_t off;
+
+ /*
+ * We use RKEY == zero for kernel virtual addresses
+ * (see rvt_get_dma_mr and dma.c).
+ */
+ rcu_read_lock();
+ if (rkey == 0) {
+ struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
+ struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
+
+ if (pd->user)
+ goto bail;
+ mr = rcu_dereference(rdi->dma_mr);
+ if (!mr)
+ goto bail;
+ atomic_inc(&mr->refcount);
+ rcu_read_unlock();
+
+ sge->mr = mr;
+ sge->vaddr = (void *)vaddr;
+ sge->length = len;
+ sge->sge_length = len;
+ sge->m = 0;
+ sge->n = 0;
+ goto ok;
+ }
+
+ mr = rcu_dereference(
+ rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ goto bail;
+
+ off = vaddr - mr->iova;
+ if (unlikely(vaddr < mr->iova || off + len > mr->length ||
+ (mr->access_flags & acc) == 0))
+ goto bail;
+ atomic_inc(&mr->refcount);
+ rcu_read_unlock();
+
+ off += mr->offset;
+ if (mr->page_shift) {
+ /*
+ * page sizes are uniform power of 2 so no loop is necessary
+ * entries_spanned_by_off is the number of times the loop below
+ * would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off / RVT_SEGSZ;
+ n = entries_spanned_by_off % RVT_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= RVT_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ }
+ sge->mr = mr;
+ sge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ sge->length = mr->map[m]->segs[n].length - off;
+ sge->sge_length = len;
+ sge->m = m;
+ sge->n = n;
+ok:
+ return 1;
+bail:
+ rcu_read_unlock();
+ return 0;
+}
+EXPORT_SYMBOL(rvt_rkey_ok);
diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h
new file mode 100644
index 000000000000..69380512c6d1
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mr.h
@@ -0,0 +1,92 @@
+#ifndef DEF_RVTMR_H
+#define DEF_RVTMR_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+struct rvt_fmr {
+ struct ib_fmr ibfmr;
+ struct rvt_mregion mr; /* must be last */
+};
+
+struct rvt_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct rvt_mregion mr; /* must be last */
+};
+
+static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct rvt_fmr, ibfmr);
+}
+
+static inline struct rvt_mr *to_imr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct rvt_mr, ibmr);
+}
+
+int rvt_driver_mr_init(struct rvt_dev_info *rdi);
+void rvt_mr_exit(struct rvt_dev_info *rdi);
+
+/* Mem Regions */
+struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+int rvt_dereg_mr(struct ib_mr *ibmr);
+struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
+struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+int rvt_unmap_fmr(struct list_head *fmr_list);
+int rvt_dealloc_fmr(struct ib_fmr *ibfmr);
+
+#endif /* DEF_RVTMR_H */
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
new file mode 100644
index 000000000000..d1292f324c67
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "pd.h"
+
+/**
+ * rvt_alloc_pd - allocate a protection domain
+ * @ibdev: ib device
+ * @context: optional user context
+ * @udata: optional user data
+ *
+ * Allocate and keep track of a PD.
+ *
+ * Return: 0 on success
+ */
+struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct rvt_dev_info *dev = ib_to_rvt(ibdev);
+ struct rvt_pd *pd;
+ struct ib_pd *ret;
+
+ pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+ /*
+ * While we could continue allocating protecetion domains, being
+ * constrained only by system resources. The IBTA spec defines that
+ * there is a max_pd limit that can be set and we need to check for
+ * that.
+ */
+
+ spin_lock(&dev->n_pds_lock);
+ if (dev->n_pds_allocated == dev->dparms.props.max_pd) {
+ spin_unlock(&dev->n_pds_lock);
+ kfree(pd);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_pds_allocated++;
+ spin_unlock(&dev->n_pds_lock);
+
+ /* ib_alloc_pd() will initialize pd->ibpd. */
+ pd->user = udata ? 1 : 0;
+
+ ret = &pd->ibpd;
+
+bail:
+ return ret;
+}
+
+/**
+ * rvt_dealloc_pd - Free PD
+ * @ibpd: Free up PD
+ *
+ * Return: always 0
+ */
+int rvt_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
+ struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+
+ spin_lock(&dev->n_pds_lock);
+ dev->n_pds_allocated--;
+ spin_unlock(&dev->n_pds_lock);
+
+ kfree(pd);
+
+ return 0;
+}
diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
new file mode 100644
index 000000000000..1892ca4a9746
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/pd.h
@@ -0,0 +1,58 @@
+#ifndef DEF_RDMAVTPD_H
+#define DEF_RDMAVTPD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int rvt_dealloc_pd(struct ib_pd *ibpd);
+
+#endif /* DEF_RDMAVTPD_H */
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
new file mode 100644
index 000000000000..bd82a6948dc8
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -0,0 +1,1696 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/hash.h>
+#include <linux/bitops.h>
+#include <linux/lockdep.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+#include "qp.h"
+#include "vt.h"
+#include "trace.h"
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; rvt_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = 0,
+ [IB_QPS_INIT] = RVT_POST_RECV_OK,
+ [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
+ [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+ RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
+ RVT_PROCESS_NEXT_SEND_OK,
+ [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+ RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+ RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+ [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
+ RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+};
+EXPORT_SYMBOL(ib_rvt_state_ops);
+
+static void get_map_page(struct rvt_qpn_table *qpt,
+ struct rvt_qpn_map *map,
+ gfp_t gfp)
+{
+ unsigned long page = get_zeroed_page(gfp);
+
+ /*
+ * Free the page if someone raced with us installing it.
+ */
+
+ spin_lock(&qpt->lock);
+ if (map->page)
+ free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock(&qpt->lock);
+}
+
+/**
+ * init_qpn_table - initialize the QP number table for a device
+ * @qpt: the QPN table
+ */
+static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
+{
+ u32 offset, i;
+ struct rvt_qpn_map *map;
+ int ret = 0;
+
+ if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
+ return -EINVAL;
+
+ spin_lock_init(&qpt->lock);
+
+ qpt->last = rdi->dparms.qpn_start;
+ qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
+
+ /*
+ * Drivers may want some QPs beyond what we need for verbs let them use
+ * our qpn table. No need for two. Lets go ahead and mark the bitmaps
+ * for those. The reserved range must be *after* the range which verbs
+ * will pick from.
+ */
+
+ /* Figure out number of bit maps needed before reserved range */
+ qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
+
+ /* This should always be zero */
+ offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
+
+ /* Starting with the first reserved bit map */
+ map = &qpt->map[qpt->nmaps];
+
+ rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
+ rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
+ for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
+ if (!map->page) {
+ get_map_page(qpt, map, GFP_KERNEL);
+ if (!map->page) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+ set_bit(offset, map->page);
+ offset++;
+ if (offset == RVT_BITS_PER_PAGE) {
+ /* next page */
+ qpt->nmaps++;
+ map++;
+ offset = 0;
+ }
+ }
+ return ret;
+}
+
+/**
+ * free_qpn_table - free the QP number table for a device
+ * @qpt: the QPN table
+ */
+static void free_qpn_table(struct rvt_qpn_table *qpt)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
+ free_page((unsigned long)qpt->map[i].page);
+}
+
+/**
+ * rvt_driver_qp_init - Init driver qp resources
+ * @rdi: rvt dev strucutre
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_qp_init(struct rvt_dev_info *rdi)
+{
+ int i;
+ int ret = -ENOMEM;
+
+ if (!rdi->dparms.qp_table_size)
+ return -EINVAL;
+
+ /*
+ * If driver is not doing any QP allocation then make sure it is
+ * providing the necessary QP functions.
+ */
+ if (!rdi->driver_f.free_all_qps ||
+ !rdi->driver_f.qp_priv_alloc ||
+ !rdi->driver_f.qp_priv_free ||
+ !rdi->driver_f.notify_qp_reset)
+ return -EINVAL;
+
+ /* allocate parent object */
+ rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
+ rdi->dparms.node);
+ if (!rdi->qp_dev)
+ return -ENOMEM;
+
+ /* allocate hash table */
+ rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
+ rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
+ rdi->qp_dev->qp_table =
+ kmalloc_node(rdi->qp_dev->qp_table_size *
+ sizeof(*rdi->qp_dev->qp_table),
+ GFP_KERNEL, rdi->dparms.node);
+ if (!rdi->qp_dev->qp_table)
+ goto no_qp_table;
+
+ for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
+ RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
+
+ spin_lock_init(&rdi->qp_dev->qpt_lock);
+
+ /* initialize qpn map */
+ if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
+ goto fail_table;
+
+ spin_lock_init(&rdi->n_qps_lock);
+
+ return 0;
+
+fail_table:
+ kfree(rdi->qp_dev->qp_table);
+ free_qpn_table(&rdi->qp_dev->qpn_table);
+
+no_qp_table:
+ kfree(rdi->qp_dev);
+
+ return ret;
+}
+
+/**
+ * free_all_qps - check for QPs still in use
+ * @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
+ */
+static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
+{
+ unsigned long flags;
+ struct rvt_qp *qp;
+ unsigned n, qp_inuse = 0;
+ spinlock_t *ql; /* work around too long line below */
+
+ if (rdi->driver_f.free_all_qps)
+ qp_inuse = rdi->driver_f.free_all_qps(rdi);
+
+ qp_inuse += rvt_mcast_tree_empty(rdi);
+
+ if (!rdi->qp_dev)
+ return qp_inuse;
+
+ ql = &rdi->qp_dev->qpt_lock;
+ spin_lock_irqsave(ql, flags);
+ for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
+ qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
+ lockdep_is_held(ql));
+ RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
+
+ for (; qp; qp = rcu_dereference_protected(qp->next,
+ lockdep_is_held(ql)))
+ qp_inuse++;
+ }
+ spin_unlock_irqrestore(ql, flags);
+ synchronize_rcu();
+ return qp_inuse;
+}
+
+/**
+ * rvt_qp_exit - clean up qps on device exit
+ * @rdi: rvt dev structure
+ *
+ * Check for qp leaks and free resources.
+ */
+void rvt_qp_exit(struct rvt_dev_info *rdi)
+{
+ u32 qps_inuse = rvt_free_all_qps(rdi);
+
+ if (qps_inuse)
+ rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
+ qps_inuse);
+ if (!rdi->qp_dev)
+ return;
+
+ kfree(rdi->qp_dev->qp_table);
+ free_qpn_table(&rdi->qp_dev->qpn_table);
+ kfree(rdi->qp_dev);
+}
+
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+ struct rvt_qpn_map *map, unsigned off)
+{
+ return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
+}
+
+/**
+ * alloc_qpn - Allocate the next available qpn or zero/one for QP type
+ * IB_QPT_SMI/IB_QPT_GSI
+ *@rdi: rvt device info structure
+ *@qpt: queue pair number table pointer
+ *@port_num: IB port number, 1 based, comes from core
+ *
+ * Return: The queue pair number
+ */
+static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+ enum ib_qp_type type, u8 port_num, gfp_t gfp)
+{
+ u32 i, offset, max_scan, qpn;
+ struct rvt_qpn_map *map;
+ u32 ret;
+
+ if (rdi->driver_f.alloc_qpn)
+ return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
+
+ if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
+ unsigned n;
+
+ ret = type == IB_QPT_GSI;
+ n = 1 << (ret + 2 * (port_num - 1));
+ spin_lock(&qpt->lock);
+ if (qpt->flags & n)
+ ret = -EINVAL;
+ else
+ qpt->flags |= n;
+ spin_unlock(&qpt->lock);
+ goto bail;
+ }
+
+ qpn = qpt->last + qpt->incr;
+ if (qpn >= RVT_QPN_MAX)
+ qpn = qpt->incr | ((qpt->last & 1) ^ 1);
+ /* offset carries bit 0 */
+ offset = qpn & RVT_BITS_PER_PAGE_MASK;
+ map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
+ max_scan = qpt->nmaps - !offset;
+ for (i = 0;;) {
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map, gfp);
+ if (unlikely(!map->page))
+ break;
+ }
+ do {
+ if (!test_and_set_bit(offset, map->page)) {
+ qpt->last = qpn;
+ ret = qpn;
+ goto bail;
+ }
+ offset += qpt->incr;
+ /*
+ * This qpn might be bogus if offset >= BITS_PER_PAGE.
+ * That is OK. It gets re-assigned below
+ */
+ qpn = mk_qpn(qpt, map, offset);
+ } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
+ /*
+ * In order to keep the number of pages allocated to a
+ * minimum, we scan the all existing pages before increasing
+ * the size of the bitmap table.
+ */
+ if (++i > max_scan) {
+ if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
+ break;
+ map = &qpt->map[qpt->nmaps++];
+ /* start at incr with current bit 0 */
+ offset = qpt->incr | (offset & 1);
+ } else if (map < &qpt->map[qpt->nmaps]) {
+ ++map;
+ /* start at incr with current bit 0 */
+ offset = qpt->incr | (offset & 1);
+ } else {
+ map = &qpt->map[0];
+ /* wrap to first map page, invert bit 0 */
+ offset = qpt->incr | ((offset & 1) ^ 1);
+ }
+ /* there can be no bits at shift and below */
+ WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+ qpn = mk_qpn(qpt, map, offset);
+ }
+
+ ret = -ENOMEM;
+
+bail:
+ return ret;
+}
+
+static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+ struct rvt_qpn_map *map;
+
+ map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+ if (map->page)
+ clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * rvt_clear_mr_refs - Drop help mr refs
+ * @qp: rvt qp data structure
+ * @clr_sends: If shoudl clear send side or not
+ */
+static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
+{
+ unsigned n;
+
+ if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+ rvt_put_ss(&qp->s_rdma_read_sge);
+
+ rvt_put_ss(&qp->r_sge);
+
+ if (clr_sends) {
+ while (qp->s_last != qp->s_head) {
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+ unsigned i;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct rvt_sge *sge = &wqe->sg_list[i];
+
+ rvt_put_mr(sge->mr);
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ atomic_dec(&ibah_to_rvtah(
+ wqe->ud_wr.ah)->refcount);
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ smp_wmb(); /* see qp_set_savail */
+ }
+ if (qp->s_rdma_mr) {
+ rvt_put_mr(qp->s_rdma_mr);
+ qp->s_rdma_mr = NULL;
+ }
+ }
+
+ if (qp->ibqp.qp_type != IB_QPT_RC)
+ return;
+
+ for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
+ struct rvt_ack_entry *e = &qp->s_ack_queue[n];
+
+ if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
+ e->rdma_sge.mr) {
+ rvt_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+ }
+}
+
+/**
+ * rvt_remove_qp - remove qp form table
+ * @rdi: rvt dev struct
+ * @qp: qp to remove
+ *
+ * Remove the QP from the table so it can't be found asynchronously by
+ * the receive routine.
+ */
+static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+ struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+ u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
+ unsigned long flags;
+ int removed = 1;
+
+ spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+
+ if (rcu_dereference_protected(rvp->qp[0],
+ lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
+ RCU_INIT_POINTER(rvp->qp[0], NULL);
+ } else if (rcu_dereference_protected(rvp->qp[1],
+ lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
+ RCU_INIT_POINTER(rvp->qp[1], NULL);
+ } else {
+ struct rvt_qp *q;
+ struct rvt_qp __rcu **qpp;
+
+ removed = 0;
+ qpp = &rdi->qp_dev->qp_table[n];
+ for (; (q = rcu_dereference_protected(*qpp,
+ lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
+ qpp = &q->next) {
+ if (q == qp) {
+ RCU_INIT_POINTER(*qpp,
+ rcu_dereference_protected(qp->next,
+ lockdep_is_held(&rdi->qp_dev->qpt_lock)));
+ removed = 1;
+ trace_rvt_qpremove(qp, n);
+ break;
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
+ if (removed) {
+ synchronize_rcu();
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+/**
+ * reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ * r and s lock are required to be held by the caller
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
+{
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+
+ /* Let drivers flush their waitlist */
+ rdi->driver_f.flush_qp_waiters(qp);
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+
+ /* Stop the send queue and the retry timer */
+ rdi->driver_f.stop_send_queue(qp);
+
+ /* Wait for things to stop */
+ rdi->driver_f.quiesce_qp(qp);
+
+ /* take qp out the hash and wait for it to be unused */
+ rvt_remove_qp(rdi, qp);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+ /* grab the lock b/c it was locked at call time */
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+
+ rvt_clear_mr_refs(qp, 1);
+ }
+
+ /*
+ * Let the driver do any tear down it needs to for a qp
+ * that has been reset
+ */
+ rdi->driver_f.notify_qp_reset(qp);
+
+ qp->remote_qpn = 0;
+ qp->qkey = 0;
+ qp->qp_access_flags = 0;
+ qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
+ qp->s_hdrwords = 0;
+ qp->s_wqe = NULL;
+ qp->s_draining = 0;
+ qp->s_next_psn = 0;
+ qp->s_last_psn = 0;
+ qp->s_sending_psn = 0;
+ qp->s_sending_hpsn = 0;
+ qp->s_psn = 0;
+ qp->r_psn = 0;
+ qp->r_msn = 0;
+ if (type == IB_QPT_RC) {
+ qp->s_state = IB_OPCODE_RC_SEND_LAST;
+ qp->r_state = IB_OPCODE_RC_SEND_LAST;
+ } else {
+ qp->s_state = IB_OPCODE_UC_SEND_LAST;
+ qp->r_state = IB_OPCODE_UC_SEND_LAST;
+ }
+ qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+ qp->r_nak_state = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
+ qp->s_head = 0;
+ qp->s_tail = 0;
+ qp->s_cur = 0;
+ qp->s_acked = 0;
+ qp->s_last = 0;
+ qp->s_ssn = 1;
+ qp->s_lsn = 0;
+ qp->s_mig_state = IB_MIG_MIGRATED;
+ memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ qp->r_head_ack_queue = 0;
+ qp->s_tail_ack_queue = 0;
+ qp->s_num_rd_atomic = 0;
+ if (qp->r_rq.wq) {
+ qp->r_rq.wq->head = 0;
+ qp->r_rq.wq->tail = 0;
+ }
+ qp->r_sge.num_sge = 0;
+}
+
+/**
+ * rvt_create_qp - create a queue pair for a device
+ * @ibpd: the protection domain who's device we create the queue pair for
+ * @init_attr: the attributes of the queue pair
+ * @udata: user data for libibverbs.so
+ *
+ * Queue pair creation is mostly an rvt issue. However, drivers have their own
+ * unique idea of what queue pair numbers mean. For instance there is a reserved
+ * range for PSM.
+ *
+ * Return: the queue pair on success, otherwise returns an errno.
+ *
+ * Called by the ib_create_qp() core verbs function.
+ */
+struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct rvt_qp *qp;
+ int err;
+ struct rvt_swqe *swq = NULL;
+ size_t sz;
+ size_t sg_list_sz;
+ struct ib_qp *ret = ERR_PTR(-ENOMEM);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+ void *priv = NULL;
+ gfp_t gfp;
+
+ if (!rdi)
+ return ERR_PTR(-EINVAL);
+
+ if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+ init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
+ init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+ return ERR_PTR(-EINVAL);
+
+ /* GFP_NOIO is applicable to RC QP's only */
+
+ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+ init_attr->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+ GFP_NOIO : GFP_KERNEL;
+
+ /* Check receive queue parameters if no SRQ is specified. */
+ if (!init_attr->srq) {
+ if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+ init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
+ return ERR_PTR(-EINVAL);
+
+ if (init_attr->cap.max_send_sge +
+ init_attr->cap.max_send_wr +
+ init_attr->cap.max_recv_sge +
+ init_attr->cap.max_recv_wr == 0)
+ return ERR_PTR(-EINVAL);
+ }
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ if (init_attr->port_num == 0 ||
+ init_attr->port_num > ibpd->device->phys_port_cnt)
+ return ERR_PTR(-EINVAL);
+ case IB_QPT_UC:
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ sz = sizeof(struct rvt_sge) *
+ init_attr->cap.max_send_sge +
+ sizeof(struct rvt_swqe);
+ if (gfp == GFP_NOIO)
+ swq = __vmalloc(
+ (init_attr->cap.max_send_wr + 1) * sz,
+ gfp, PAGE_KERNEL);
+ else
+ swq = vmalloc_node(
+ (init_attr->cap.max_send_wr + 1) * sz,
+ rdi->dparms.node);
+ if (!swq)
+ return ERR_PTR(-ENOMEM);
+
+ sz = sizeof(*qp);
+ sg_list_sz = 0;
+ if (init_attr->srq) {
+ struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
+
+ if (srq->rq.max_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (srq->rq.max_sge - 1);
+ } else if (init_attr->cap.max_recv_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (init_attr->cap.max_recv_sge - 1);
+ qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
+ if (!qp)
+ goto bail_swq;
+
+ RCU_INIT_POINTER(qp->next, NULL);
+
+ /*
+ * Driver needs to set up it's private QP structure and do any
+ * initialization that is needed.
+ */
+ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
+ if (!priv)
+ goto bail_qp;
+ qp->priv = priv;
+ qp->timeout_jiffies =
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL);
+ if (init_attr->srq) {
+ sz = 0;
+ } else {
+ qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+ qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+ sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+ sizeof(struct rvt_rwqe);
+ if (udata)
+ qp->r_rq.wq = vmalloc_user(
+ sizeof(struct rvt_rwq) +
+ qp->r_rq.size * sz);
+ else if (gfp == GFP_NOIO)
+ qp->r_rq.wq = __vmalloc(
+ sizeof(struct rvt_rwq) +
+ qp->r_rq.size * sz,
+ gfp, PAGE_KERNEL);
+ else
+ qp->r_rq.wq = vmalloc_node(
+ sizeof(struct rvt_rwq) +
+ qp->r_rq.size * sz,
+ rdi->dparms.node);
+ if (!qp->r_rq.wq)
+ goto bail_driver_priv;
+ }
+
+ /*
+ * ib_create_qp() will initialize qp->ibqp
+ * except for qp->ibqp.qp_num.
+ */
+ spin_lock_init(&qp->r_lock);
+ spin_lock_init(&qp->s_hlock);
+ spin_lock_init(&qp->s_lock);
+ spin_lock_init(&qp->r_rq.lock);
+ atomic_set(&qp->refcount, 0);
+ init_waitqueue_head(&qp->wait);
+ init_timer(&qp->s_timer);
+ qp->s_timer.data = (unsigned long)qp;
+ INIT_LIST_HEAD(&qp->rspwait);
+ qp->state = IB_QPS_RESET;
+ qp->s_wq = swq;
+ qp->s_size = init_attr->cap.max_send_wr + 1;
+ qp->s_avail = init_attr->cap.max_send_wr;
+ qp->s_max_sge = init_attr->cap.max_send_sge;
+ if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+ qp->s_flags = RVT_S_SIGNAL_REQ_WR;
+
+ err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
+ init_attr->qp_type,
+ init_attr->port_num, gfp);
+ if (err < 0) {
+ ret = ERR_PTR(err);
+ goto bail_rq_wq;
+ }
+ qp->ibqp.qp_num = err;
+ qp->port_num = init_attr->port_num;
+ rvt_reset_qp(rdi, qp, init_attr->qp_type);
+ break;
+
+ default:
+ /* Don't support raw QPs */
+ return ERR_PTR(-EINVAL);
+ }
+
+ init_attr->cap.max_inline_data = 0;
+
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See rvt_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ if (!qp->r_rq.wq) {
+ __u64 offset = 0;
+
+ err = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_qpn;
+ }
+ } else {
+ u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
+
+ qp->ip = rvt_create_mmap_info(rdi, s,
+ ibpd->uobject->context,
+ qp->r_rq.wq);
+ if (!qp->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qpn;
+ }
+
+ err = ib_copy_to_udata(udata, &qp->ip->offset,
+ sizeof(qp->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ }
+ qp->pid = current->pid;
+ }
+
+ spin_lock(&rdi->n_qps_lock);
+ if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
+ spin_unlock(&rdi->n_qps_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ rdi->n_qps_allocated++;
+ /*
+ * Maintain a busy_jiffies variable that will be added to the timeout
+ * period in mod_retry_timer and add_retry_timer. This busy jiffies
+ * is scaled by the number of rc qps created for the device to reduce
+ * the number of timeouts occurring when there is a large number of
+ * qps. busy_jiffies is incremented every rc qp scaling interval.
+ * The scaling interval is selected based on extensive performance
+ * evaluation of targeted workloads.
+ */
+ if (init_attr->qp_type == IB_QPT_RC) {
+ rdi->n_rc_qps++;
+ rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
+ }
+ spin_unlock(&rdi->n_qps_lock);
+
+ if (qp->ip) {
+ spin_lock_irq(&rdi->pending_lock);
+ list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
+ spin_unlock_irq(&rdi->pending_lock);
+ }
+
+ ret = &qp->ibqp;
+
+ /*
+ * We have our QP and its good, now keep track of what types of opcodes
+ * can be processed on this QP. We do this by keeping track of what the
+ * 3 high order bits of the opcode are.
+ */
+ switch (init_attr->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ break;
+ case IB_QPT_RC:
+ qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ break;
+ case IB_QPT_UC:
+ qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ break;
+ default:
+ ret = ERR_PTR(-EINVAL);
+ goto bail_ip;
+ }
+
+ return ret;
+
+bail_ip:
+ kref_put(&qp->ip->ref, rvt_release_mmap_info);
+
+bail_qpn:
+ free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+bail_rq_wq:
+ vfree(qp->r_rq.wq);
+
+bail_driver_priv:
+ rdi->driver_f.qp_priv_free(rdi, qp);
+
+bail_qp:
+ kfree(qp);
+
+bail_swq:
+ vfree(swq);
+
+ return ret;
+}
+
+/**
+ * rvt_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
+ * @err: the receive completion error to signal if a RWQE is active
+ *
+ * Flushes both send and receive work queues.
+ *
+ * Return: true if last WQE event should be generated.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
+ */
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
+{
+ struct ib_wc wc;
+ int ret = 0;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
+ goto bail;
+
+ qp->state = IB_QPS_ERR;
+
+ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ }
+
+ if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
+ qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
+
+ rdi->driver_f.notify_error_qp(qp);
+
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (ACCESS_ONCE(qp->s_last) != qp->s_head)
+ rdi->driver_f.schedule_send(qp);
+
+ rvt_clear_mr_refs(qp, 0);
+
+ memset(&wc, 0, sizeof(wc));
+ wc.qp = &qp->ibqp;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
+ wc.wr_id = qp->r_wr_id;
+ wc.status = err;
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wc.status = IB_WC_WR_FLUSH_ERR;
+
+ if (qp->r_rq.wq) {
+ struct rvt_rwq *wq;
+ u32 head;
+ u32 tail;
+
+ spin_lock(&qp->r_rq.lock);
+
+ /* sanity check pointers before trusting them */
+ wq = qp->r_rq.wq;
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ while (tail != head) {
+ wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+ if (++tail >= qp->r_rq.size)
+ tail = 0;
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wq->tail = tail;
+
+ spin_unlock(&qp->r_rq.lock);
+ } else if (qp->ibqp.event_handler) {
+ ret = 1;
+ }
+
+bail:
+ return ret;
+}
+EXPORT_SYMBOL(rvt_error_qp);
+
+/*
+ * Put the QP into the hash table.
+ * The hash table holds a reference to the QP.
+ */
+static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+ struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+ unsigned long flags;
+
+ atomic_inc(&qp->refcount);
+ spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+
+ if (qp->ibqp.qp_num <= 1) {
+ rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
+ } else {
+ u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
+
+ qp->next = rdi->qp_dev->qp_table[n];
+ rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
+ trace_rvt_qpinsert(qp, n);
+ }
+
+ spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
+}
+
+/**
+ * qib_modify_qp - modify the attributes of a queue pair
+ * @ibqp: the queue pair who's attributes we're modifying
+ * @attr: the new attributes
+ * @attr_mask: the mask of attributes to modify
+ * @udata: user data for libibverbs.so
+ *
+ * Return: 0 on success, otherwise returns an errno.
+ */
+int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ struct ib_event ev;
+ int lastwqe = 0;
+ int mig = 0;
+ int pmtu = 0; /* for gcc warning only */
+ enum rdma_link_layer link;
+
+ link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
+
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ?
+ attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask, link))
+ goto inval;
+
+ if (rdi->driver_f.check_modify_qp &&
+ rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
+ goto inval;
+
+ if (attr_mask & IB_QP_AV) {
+ if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ goto inval;
+ if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_ah_attr.dlid >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE))
+ goto inval;
+ if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
+ goto inval;
+ if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ if (attr->pkey_index >= rvt_get_npkeys(rdi))
+ goto inval;
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ if (attr->min_rnr_timer > 31)
+ goto inval;
+
+ if (attr_mask & IB_QP_PORT)
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI ||
+ attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt)
+ goto inval;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ if (attr->dest_qp_num > RVT_QPN_MASK)
+ goto inval;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ if (attr->retry_cnt > 7)
+ goto inval;
+
+ if (attr_mask & IB_QP_RNR_RETRY)
+ if (attr->rnr_retry > 7)
+ goto inval;
+
+ /*
+ * Don't allow invalid path_mtu values. OK to set greater
+ * than the active mtu (or even the max_cap, if we have tuned
+ * that to a small mtu. We'll set qp->path_mtu
+ * to the lesser of requested attribute mtu and active,
+ * for packetizing messages.
+ * Note that the QP port has to be set in INIT and MTU in RTR.
+ */
+ if (attr_mask & IB_QP_PATH_MTU) {
+ pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
+ if (pmtu < 0)
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE) {
+ if (attr->path_mig_state == IB_MIG_REARM) {
+ if (qp->s_mig_state == IB_MIG_ARMED)
+ goto inval;
+ if (new_state != IB_QPS_RTS)
+ goto inval;
+ } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
+ if (qp->s_mig_state == IB_MIG_REARM)
+ goto inval;
+ if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
+ goto inval;
+ if (qp->s_mig_state == IB_MIG_ARMED)
+ mig = 1;
+ } else {
+ goto inval;
+ }
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
+ goto inval;
+
+ switch (new_state) {
+ case IB_QPS_RESET:
+ if (qp->state != IB_QPS_RESET)
+ rvt_reset_qp(rdi, qp, ibqp->qp_type);
+ break;
+
+ case IB_QPS_RTR:
+ /* Allow event to re-trigger if QP set to RTR more than once */
+ qp->r_flags &= ~RVT_R_COMM_EST;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_ERR:
+ lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ break;
+
+ default:
+ qp->state = new_state;
+ break;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ qp->s_pkey_index = attr->pkey_index;
+
+ if (attr_mask & IB_QP_PORT)
+ qp->port_num = attr->port_num;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ qp->remote_qpn = attr->dest_qp_num;
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
+ qp->s_psn = qp->s_next_psn;
+ qp->s_sending_psn = qp->s_next_psn;
+ qp->s_last_psn = qp->s_next_psn - 1;
+ qp->s_sending_hpsn = qp->s_last_psn;
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->qp_access_flags = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_AV) {
+ qp->remote_ah_attr = attr->ah_attr;
+ qp->s_srate = attr->ah_attr.static_rate;
+ qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ qp->alt_ah_attr = attr->alt_ah_attr;
+ qp->s_alt_pkey_index = attr->alt_pkey_index;
+ }
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE) {
+ qp->s_mig_state = attr->path_mig_state;
+ if (mig) {
+ qp->remote_ah_attr = qp->alt_ah_attr;
+ qp->port_num = qp->alt_ah_attr.port_num;
+ qp->s_pkey_index = qp->s_alt_pkey_index;
+ }
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
+ qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
+ qp->log_pmtu = ilog2(qp->pmtu);
+ }
+
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ qp->s_retry_cnt = attr->retry_cnt;
+ qp->s_retry = attr->retry_cnt;
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ qp->s_rnr_retry_cnt = attr->rnr_retry;
+ qp->s_rnr_retry = attr->rnr_retry;
+ }
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ qp->r_min_rnr_timer = attr->min_rnr_timer;
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ qp->timeout = attr->timeout;
+ qp->timeout_jiffies =
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL);
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ qp->qkey = attr->qkey;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ qp->s_max_rd_atomic = attr->max_rd_atomic;
+
+ if (rdi->driver_f.modify_qp)
+ rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
+
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ rvt_insert_qp(rdi, qp);
+
+ if (lastwqe) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ if (mig) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_PATH_MIG;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ return 0;
+
+inval:
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+ return -EINVAL;
+}
+
+/** rvt_free_qpn - Free a qpn from the bit map
+ * @qpt: QP table
+ * @qpn: queue pair number to free
+ */
+static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+ struct rvt_qpn_map *map;
+
+ map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+ if (map->page)
+ clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * rvt_destroy_qp - destroy a queue pair
+ * @ibqp: the queue pair to destroy
+ *
+ * Note that this can be called while the QP is actively sending or
+ * receiving!
+ *
+ * Return: 0 on success.
+ */
+int rvt_destroy_qp(struct ib_qp *ibqp)
+{
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+ rvt_reset_qp(rdi, qp, ibqp->qp_type);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+
+ /* qpn is now available for use again */
+ rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+ spin_lock(&rdi->n_qps_lock);
+ rdi->n_qps_allocated--;
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ rdi->n_rc_qps--;
+ rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
+ }
+ spin_unlock(&rdi->n_qps_lock);
+
+ if (qp->ip)
+ kref_put(&qp->ip->ref, rvt_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
+ vfree(qp->s_wq);
+ rdi->driver_f.qp_priv_free(rdi, qp);
+ kfree(qp);
+ return 0;
+}
+
+/**
+ * rvt_query_qp - query an ipbq
+ * @ibqp: IB qp to query
+ * @attr: attr struct to fill in
+ * @attr_mask: attr mask ignored
+ * @init_attr: struct to fill in
+ *
+ * Return: always 0
+ */
+int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ attr->qp_state = qp->state;
+ attr->cur_qp_state = attr->qp_state;
+ attr->path_mtu = qp->path_mtu;
+ attr->path_mig_state = qp->s_mig_state;
+ attr->qkey = qp->qkey;
+ attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
+ attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
+ attr->dest_qp_num = qp->remote_qpn;
+ attr->qp_access_flags = qp->qp_access_flags;
+ attr->cap.max_send_wr = qp->s_size - 1;
+ attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
+ attr->cap.max_send_sge = qp->s_max_sge;
+ attr->cap.max_recv_sge = qp->r_rq.max_sge;
+ attr->cap.max_inline_data = 0;
+ attr->ah_attr = qp->remote_ah_attr;
+ attr->alt_ah_attr = qp->alt_ah_attr;
+ attr->pkey_index = qp->s_pkey_index;
+ attr->alt_pkey_index = qp->s_alt_pkey_index;
+ attr->en_sqd_async_notify = 0;
+ attr->sq_draining = qp->s_draining;
+ attr->max_rd_atomic = qp->s_max_rd_atomic;
+ attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
+ attr->min_rnr_timer = qp->r_min_rnr_timer;
+ attr->port_num = qp->port_num;
+ attr->timeout = qp->timeout;
+ attr->retry_cnt = qp->s_retry_cnt;
+ attr->rnr_retry = qp->s_rnr_retry_cnt;
+ attr->alt_port_num = qp->alt_ah_attr.port_num;
+ attr->alt_timeout = qp->alt_timeout;
+
+ init_attr->event_handler = qp->ibqp.event_handler;
+ init_attr->qp_context = qp->ibqp.qp_context;
+ init_attr->send_cq = qp->ibqp.send_cq;
+ init_attr->recv_cq = qp->ibqp.recv_cq;
+ init_attr->srq = qp->ibqp.srq;
+ init_attr->cap = attr->cap;
+ if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ else
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->qp_type = qp->ibqp.qp_type;
+ init_attr->port_num = qp->port_num;
+ return 0;
+}
+
+/**
+ * rvt_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success otherwise errno
+ */
+int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_rwq *wq = qp->r_rq.wq;
+ unsigned long flags;
+ int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
+ !qp->ibqp.srq;
+
+ /* Check that state is OK to post receive. */
+ if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ for (; wr; wr = wr->next) {
+ struct rvt_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->r_rq.lock, flags);
+ next = wq->head + 1;
+ if (next >= qp->r_rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ *bad_wr = wr;
+ return -ENOMEM;
+ }
+ if (unlikely(qp_err_flush)) {
+ struct ib_wc wc;
+
+ memset(&wc, 0, sizeof(wc));
+ wc.qp = &qp->ibqp;
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = wr->wr_id;
+ wc.status = IB_WC_WR_FLUSH_ERR;
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+ } else {
+ wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /*
+ * Make sure queue entry is written
+ * before the head index.
+ */
+ smp_wmb();
+ wq->head = next;
+ }
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ }
+ return 0;
+}
+
+/**
+ * qp_get_savail - return number of avail send entries
+ *
+ * @qp - the qp
+ *
+ * This assumes the s_hlock is held but the s_last
+ * qp variable is uncontrolled.
+ */
+static inline u32 qp_get_savail(struct rvt_qp *qp)
+{
+ u32 slast;
+ u32 ret;
+
+ smp_read_barrier_depends(); /* see rc.c */
+ slast = ACCESS_ONCE(qp->s_last);
+ if (qp->s_head >= slast)
+ ret = qp->s_size - (qp->s_head - slast);
+ else
+ ret = slast - qp->s_head;
+ return ret - 1;
+}
+
+/**
+ * rvt_post_one_wr - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int rvt_post_one_wr(struct rvt_qp *qp,
+ struct ib_send_wr *wr,
+ int *call_send)
+{
+ struct rvt_swqe *wqe;
+ u32 next;
+ int i;
+ int j;
+ int acc;
+ struct rvt_lkey_table *rkt;
+ struct rvt_pd *pd;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ u8 log_pmtu;
+ int ret;
+
+ /* IB spec says that num_sge == 0 is OK. */
+ if (unlikely(wr->num_sge > qp->s_max_sge))
+ return -EINVAL;
+
+ /*
+ * Don't allow RDMA reads or atomic operations on UC or
+ * undefined operations.
+ * Make sure buffer is large enough to hold the result for atomics.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
+ return -EINVAL;
+ } else if (qp->ibqp.qp_type != IB_QPT_RC) {
+ /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+ /* Check UD destination address PD */
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
+ return -EINVAL;
+ } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
+ return -EINVAL;
+ } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1))) {
+ return -EINVAL;
+ } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+ return -EINVAL;
+ }
+ /* check for avail */
+ if (unlikely(!qp->s_avail)) {
+ qp->s_avail = qp_get_savail(qp);
+ if (WARN_ON(qp->s_avail > (qp->s_size - 1)))
+ rvt_pr_err(rdi,
+ "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
+ qp->ibqp.qp_num, qp->s_size, qp->s_avail,
+ qp->s_head, qp->s_tail, qp->s_cur,
+ qp->s_acked, qp->s_last);
+ if (!qp->s_avail)
+ return -ENOMEM;
+ }
+ next = qp->s_head + 1;
+ if (next >= qp->s_size)
+ next = 0;
+
+ rkt = &rdi->lkey_table;
+ pd = ibpd_to_rvtpd(qp->ibqp.pd);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_head);
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
+ wqe->length = 0;
+ j = 0;
+ if (wr->num_sge) {
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
+ IB_ACCESS_LOCAL_WRITE : 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ u32 length = wr->sg_list[i].length;
+ int ok;
+
+ if (length == 0)
+ continue;
+ ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
+ &wr->sg_list[i], acc);
+ if (!ok) {
+ ret = -EINVAL;
+ goto bail_inval_free;
+ }
+ wqe->length += length;
+ j++;
+ }
+ wqe->wr.num_sge = j;
+ }
+
+ /* general part of wqe valid - allow for driver checks */
+ if (rdi->driver_f.check_send_wqe) {
+ ret = rdi->driver_f.check_send_wqe(qp, wqe);
+ if (ret < 0)
+ goto bail_inval_free;
+ if (ret)
+ *call_send = ret;
+ }
+
+ log_pmtu = qp->log_pmtu;
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC) {
+ struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
+
+ log_pmtu = ah->log_pmtu;
+ atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+ }
+
+ wqe->ssn = qp->s_ssn++;
+ wqe->psn = qp->s_next_psn;
+ wqe->lpsn = wqe->psn +
+ (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0);
+ qp->s_next_psn = wqe->lpsn + 1;
+ trace_rvt_post_one_wr(qp, wqe);
+ smp_wmb(); /* see request builders */
+ qp->s_avail--;
+ qp->s_head = next;
+
+ return 0;
+
+bail_inval_free:
+ /* release mr holds */
+ while (j) {
+ struct rvt_sge *sge = &wqe->sg_list[--j];
+
+ rvt_put_mr(sge->mr);
+ }
+ return ret;
+}
+
+/**
+ * rvt_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success else errno
+ */
+int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ unsigned long flags = 0;
+ int call_send;
+ unsigned nreq = 0;
+ int err = 0;
+
+ spin_lock_irqsave(&qp->s_hlock, flags);
+
+ /*
+ * Ensure QP state is such that we can send. If not bail out early,
+ * there is no need to do this every time we post a send.
+ */
+ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
+ spin_unlock_irqrestore(&qp->s_hlock, flags);
+ return -EINVAL;
+ }
+
+ /*
+ * If the send queue is empty, and we only have a single WR then just go
+ * ahead and kick the send engine into gear. Otherwise we will always
+ * just schedule the send to happen later.
+ */
+ call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
+
+ for (; wr; wr = wr->next) {
+ err = rvt_post_one_wr(qp, wr, &call_send);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto bail;
+ }
+ nreq++;
+ }
+bail:
+ spin_unlock_irqrestore(&qp->s_hlock, flags);
+ if (nreq) {
+ if (call_send)
+ rdi->driver_f.schedule_send_no_lock(qp);
+ else
+ rdi->driver_f.do_send(qp);
+ }
+ return err;
+}
+
+/**
+ * rvt_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: A pointer to the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success else errno
+ */
+int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+ struct rvt_rwq *wq;
+ unsigned long flags;
+
+ for (; wr; wr = wr->next) {
+ struct rvt_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned)wr->num_sge > srq->rq.max_sge) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&srq->rq.lock, flags);
+ wq = srq->rq.wq;
+ next = wq->head + 1;
+ if (next >= srq->rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ *bad_wr = wr;
+ return -ENOMEM;
+ }
+
+ wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
new file mode 100644
index 000000000000..8409f80d5f25
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -0,0 +1,69 @@
+#ifndef DEF_RVTQP_H
+#define DEF_RVTQP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_driver_qp_init(struct rvt_dev_info *rdi);
+void rvt_qp_exit(struct rvt_dev_info *rdi);
+struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int rvt_destroy_qp(struct ib_qp *ibqp);
+int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+#endif /* DEF_RVTQP_H */
diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 67786d417493..f7c48e9023de 100644
--- a/drivers/staging/rdma/hfi1/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -52,96 +49,50 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include "verbs.h"
+#include "srq.h"
+#include "vt.h"
/**
- * hfi1_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
+ * rvt_driver_srq_init - init srq resources on a per driver basis
+ * @rdi: rvt dev structure
*
- * This may be called from interrupt context.
+ * Do any initialization needed when a driver registers with rdmavt.
*/
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+void rvt_driver_srq_init(struct rvt_dev_info *rdi)
{
- struct hfi1_srq *srq = to_isrq(ibsrq);
- struct hfi1_rwq *wq;
- unsigned long flags;
- int ret;
-
- for (; wr; wr = wr->next) {
- struct hfi1_rwqe *wqe;
- u32 next;
- int i;
-
- if ((unsigned) wr->num_sge > srq->rq.max_sge) {
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
-
- spin_lock_irqsave(&srq->rq.lock, flags);
- wq = srq->rq.wq;
- next = wq->head + 1;
- if (next >= srq->rq.size)
- next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- *bad_wr = wr;
- ret = -ENOMEM;
- goto bail;
- }
-
- wqe = get_rwqe_ptr(&srq->rq, wq->head);
- wqe->wr_id = wr->wr_id;
- wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
- /* Make sure queue entry is written before the head index. */
- smp_wmb();
- wq->head = next;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- }
- ret = 0;
-
-bail:
- return ret;
+ spin_lock_init(&rdi->n_srqs_lock);
+ rdi->n_srqs_allocated = 0;
}
/**
- * hfi1_create_srq - create a shared receive queue
+ * rvt_create_srq - create a shared receive queue
* @ibpd: the protection domain of the SRQ to create
* @srq_init_attr: the attributes of the SRQ
* @udata: data from libibverbs when creating a user SRQ
+ *
+ * Return: Allocated srq object
*/
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata)
+struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
{
- struct hfi1_ibdev *dev = to_idev(ibpd->device);
- struct hfi1_srq *srq;
+ struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+ struct rvt_srq *srq;
u32 sz;
struct ib_srq *ret;
- if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
- ret = ERR_PTR(-ENOSYS);
- goto done;
- }
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+ return ERR_PTR(-ENOSYS);
if (srq_init_attr->attr.max_sge == 0 ||
- srq_init_attr->attr.max_sge > hfi1_max_srq_sges ||
+ srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge ||
srq_init_attr->attr.max_wr == 0 ||
- srq_init_attr->attr.max_wr > hfi1_max_srq_wrs) {
- ret = ERR_PTR(-EINVAL);
- goto done;
- }
+ srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr)
+ return ERR_PTR(-EINVAL);
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
- if (!srq) {
- ret = ERR_PTR(-ENOMEM);
- goto done;
- }
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
/*
* Need to use vmalloc() if we want to support large #s of entries.
@@ -149,8 +100,8 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
srq->rq.size = srq_init_attr->attr.max_wr + 1;
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
- sizeof(struct hfi1_rwqe);
- srq->rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + srq->rq.size * sz);
+ sizeof(struct rvt_rwqe);
+ srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz);
if (!srq->rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_srq;
@@ -158,15 +109,15 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
/*
* Return the address of the RWQ as the offset to mmap.
- * See hfi1_mmap() for details.
+ * See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
- u32 s = sizeof(struct hfi1_rwq) + srq->rq.size * sz;
+ u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;
srq->ip =
- hfi1_create_mmap_info(dev, s, ibpd->uobject->context,
- srq->rq.wq);
+ rvt_create_mmap_info(dev, s, ibpd->uobject->context,
+ srq->rq.wq);
if (!srq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wq;
@@ -178,8 +129,9 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
ret = ERR_PTR(err);
goto bail_ip;
}
- } else
+ } else {
srq->ip = NULL;
+ }
/*
* ib_create_srq() will initialize srq->ibsrq.
@@ -190,7 +142,7 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
srq->limit = srq_init_attr->attr.srq_limit;
spin_lock(&dev->n_srqs_lock);
- if (dev->n_srqs_allocated == hfi1_max_srqs) {
+ if (dev->n_srqs_allocated == dev->dparms.props.max_srq) {
spin_unlock(&dev->n_srqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
@@ -205,8 +157,7 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
spin_unlock_irq(&dev->pending_lock);
}
- ret = &srq->ibsrq;
- goto done;
+ return &srq->ibsrq;
bail_ip:
kfree(srq->ip);
@@ -214,46 +165,44 @@ bail_wq:
vfree(srq->rq.wq);
bail_srq:
kfree(srq);
-done:
return ret;
}
/**
- * hfi1_modify_srq - modify a shared receive queue
+ * rvt_modify_srq - modify a shared receive queue
* @ibsrq: the SRQ to modify
* @attr: the new attributes of the SRQ
* @attr_mask: indicates which attributes to modify
* @udata: user data for libibverbs.so
+ *
+ * Return: 0 on success
*/
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask,
- struct ib_udata *udata)
+int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
{
- struct hfi1_srq *srq = to_isrq(ibsrq);
- struct hfi1_rwq *wq;
+ struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+ struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
+ struct rvt_rwq *wq;
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
- struct hfi1_rwq *owq;
- struct hfi1_rwqe *p;
+ struct rvt_rwq *owq;
+ struct rvt_rwqe *p;
u32 sz, size, n, head, tail;
/* Check that the requested sizes are below the limits. */
- if ((attr->max_wr > hfi1_max_srq_wrs) ||
+ if ((attr->max_wr > dev->dparms.props.max_srq_wr) ||
((attr_mask & IB_SRQ_LIMIT) ?
- attr->srq_limit : srq->limit) > attr->max_wr) {
- ret = -EINVAL;
- goto bail;
- }
+ attr->srq_limit : srq->limit) > attr->max_wr)
+ return -EINVAL;
- sz = sizeof(struct hfi1_rwqe) +
+ sz = sizeof(struct rvt_rwqe) +
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
- wq = vmalloc_user(sizeof(struct hfi1_rwq) + size * sz);
- if (!wq) {
- ret = -ENOMEM;
- goto bail;
- }
+ wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz);
+ if (!wq)
+ return -ENOMEM;
/* Check that we can write the offset to mmap. */
if (udata && udata->inlen >= sizeof(__u64)) {
@@ -264,8 +213,8 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
sizeof(offset_addr));
if (ret)
goto bail_free;
- udata->outbuf =
- (void __user *) (unsigned long) offset_addr;
+ udata->outbuf = (void __user *)
+ (unsigned long)offset_addr;
ret = ib_copy_to_udata(udata, &offset,
sizeof(offset));
if (ret)
@@ -296,16 +245,16 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
n = 0;
p = wq->wq;
while (tail != head) {
- struct hfi1_rwqe *wqe;
+ struct rvt_rwqe *wqe;
int i;
- wqe = get_rwqe_ptr(&srq->rq, tail);
+ wqe = rvt_get_rwqe_ptr(&srq->rq, tail);
p->wr_id = wqe->wr_id;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
- p = (struct hfi1_rwqe *)((char *)p + sz);
+ p = (struct rvt_rwqe *)((char *)p + sz);
if (++tail >= srq->rq.size)
tail = 0;
}
@@ -320,21 +269,21 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
vfree(owq);
if (srq->ip) {
- struct hfi1_mmap_info *ip = srq->ip;
- struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device);
- u32 s = sizeof(struct hfi1_rwq) + size * sz;
+ struct rvt_mmap_info *ip = srq->ip;
+ struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device);
+ u32 s = sizeof(struct rvt_rwq) + size * sz;
- hfi1_update_mmap_info(dev, ip, s, wq);
+ rvt_update_mmap_info(dev, ip, s, wq);
/*
* Return the offset to mmap.
- * See hfi1_mmap() for details.
+ * See rvt_mmap() for details.
*/
if (udata && udata->inlen >= sizeof(__u64)) {
ret = ib_copy_to_udata(udata, &ip->offset,
sizeof(ip->offset));
if (ret)
- goto bail;
+ return ret;
}
/*
@@ -355,19 +304,24 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
}
- goto bail;
+ return ret;
bail_unlock:
spin_unlock_irq(&srq->rq.lock);
bail_free:
vfree(wq);
-bail:
return ret;
}
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+/** rvt_query_srq - query srq data
+ * @ibsrq: srq to query
+ * @attr: return info in attr
+ *
+ * Return: always 0
+ */
+int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
- struct hfi1_srq *srq = to_isrq(ibsrq);
+ struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
attr->max_wr = srq->rq.size - 1;
attr->max_sge = srq->rq.max_sge;
@@ -376,19 +330,21 @@ int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
}
/**
- * hfi1_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
+ * rvt_destroy_srq - destory an srq
+ * @ibsrq: srq object to destroy
+ *
+ * Return always 0
*/
-int hfi1_destroy_srq(struct ib_srq *ibsrq)
+int rvt_destroy_srq(struct ib_srq *ibsrq)
{
- struct hfi1_srq *srq = to_isrq(ibsrq);
- struct hfi1_ibdev *dev = to_idev(ibsrq->device);
+ struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+ struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
spin_unlock(&dev->n_srqs_lock);
if (srq->ip)
- kref_put(&srq->ip->ref, hfi1_release_mmap_info);
+ kref_put(&srq->ip->ref, rvt_release_mmap_info);
else
vfree(srq->rq.wq);
kfree(srq);
diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h
new file mode 100644
index 000000000000..bf0eaaf56465
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/srq.h
@@ -0,0 +1,62 @@
+#ifndef DEF_RVTSRQ_H
+#define DEF_RVTSRQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+void rvt_driver_srq_init(struct rvt_dev_info *rdi);
+struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
+int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+int rvt_destroy_srq(struct ib_srq *ibsrq);
+
+#endif /* DEF_RVTSRQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c
new file mode 100644
index 000000000000..d593285a349c
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
new file mode 100644
index 000000000000..6c0457db5499
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR rdmavt
+
+#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RDMAVT_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi))
+#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi))
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdmavt
+
+TRACE_EVENT(rvt_dbg,
+ TP_PROTO(struct rvt_dev_info *rdi,
+ const char *msg),
+ TP_ARGS(rdi, msg),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(rdi)
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(rdi);
+ __assign_str(msg, msg);
+ ),
+ TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qphash
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, bucket)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->bucket = bucket;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x bucket %u",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->bucket
+ )
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
+#define show_wr_opcode(opcode) \
+__print_symbolic(opcode, \
+ wr_opcode_name(RDMA_WRITE), \
+ wr_opcode_name(RDMA_WRITE_WITH_IMM), \
+ wr_opcode_name(SEND), \
+ wr_opcode_name(SEND_WITH_IMM), \
+ wr_opcode_name(RDMA_READ), \
+ wr_opcode_name(ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(LSO), \
+ wr_opcode_name(SEND_WITH_INV), \
+ wr_opcode_name(RDMA_READ_WITH_INV), \
+ wr_opcode_name(LOCAL_INV), \
+ wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+ rvt_post_one_wr,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+ TP_ARGS(qp, wqe),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, psn)
+ __field(u32, lpsn)
+ __field(u32, length)
+ __field(u32, opcode)
+ __field(u32, size)
+ __field(u32, avail)
+ __field(u32, head)
+ __field(u32, last)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->psn = wqe->psn;
+ __entry->lpsn = wqe->lpsn;
+ __entry->length = wqe->length;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->size = qp->s_size;
+ __entry->avail = qp->s_avail;
+ __entry->head = qp->s_head;
+ __entry->last = qp->s_last;
+ ),
+ TP_printk(
+ POS_PRN,
+ __get_str(dev),
+ __entry->wr_id,
+ __entry->qpn,
+ __entry->psn,
+ __entry->lpsn,
+ __entry->length,
+ __entry->opcode, show_wr_opcode(__entry->opcode),
+ __entry->size,
+ __entry->avail,
+ __entry->head,
+ __entry->last
+ )
+);
+
+#endif /* __RDMAVT_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
new file mode 100644
index 000000000000..6caf5272ba1f
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -0,0 +1,873 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "vt.h"
+#include "trace.h"
+
+#define RVT_UVERBS_ABI_VERSION 2
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("RDMA Verbs Transport Library");
+
+static int rvt_init(void)
+{
+ /*
+ * rdmavt does not need to do anything special when it starts up. All it
+ * needs to do is sit and wait until a driver attempts registration.
+ */
+ return 0;
+}
+module_init(rvt_init);
+
+static void rvt_cleanup(void)
+{
+ /*
+ * Nothing to do at exit time either. The module won't be able to be
+ * removed until all drivers are gone which means all the dev structs
+ * are gone so there is really nothing to do.
+ */
+}
+module_exit(rvt_cleanup);
+
+/**
+ * rvt_alloc_device - allocate rdi
+ * @size: how big of a structure to allocate
+ * @nports: number of ports to allocate array slots for
+ *
+ * Use IB core device alloc to allocate space for the rdi which is assumed to be
+ * inside of the ib_device. Any extra space that drivers require should be
+ * included in size.
+ *
+ * We also allocate a port array based on the number of ports.
+ *
+ * Return: pointer to allocated rdi
+ */
+struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
+{
+ struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM);
+
+ rdi = (struct rvt_dev_info *)ib_alloc_device(size);
+ if (!rdi)
+ return rdi;
+
+ rdi->ports = kcalloc(nports,
+ sizeof(struct rvt_ibport **),
+ GFP_KERNEL);
+ if (!rdi->ports)
+ ib_dealloc_device(&rdi->ibdev);
+
+ return rdi;
+}
+EXPORT_SYMBOL(rvt_alloc_device);
+
+static int rvt_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+
+ if (uhw->inlen || uhw->outlen)
+ return -EINVAL;
+ /*
+ * Return rvt_dev_info.dparms.props contents
+ */
+ *props = rdi->dparms.props;
+ return 0;
+}
+
+static int rvt_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ /*
+ * There is currently no need to supply this based on qib and hfi1.
+ * Future drivers may need to implement this though.
+ */
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * rvt_query_port: Passes the query port call to the driver
+ * @ibdev: Verbs IB dev
+ * @port_num: port number, 1 based from ib core
+ * @props: structure to hold returned properties
+ *
+ * Return: 0 on success
+ */
+static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ struct rvt_ibport *rvp;
+ int port_index = ibport_num_to_idx(ibdev, port_num);
+
+ if (port_index < 0)
+ return -EINVAL;
+
+ rvp = rdi->ports[port_index];
+ memset(props, 0, sizeof(*props));
+ props->sm_lid = rvp->sm_lid;
+ props->sm_sl = rvp->sm_sl;
+ props->port_cap_flags = rvp->port_cap_flags;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = rvt_get_npkeys(rdi);
+ props->bad_pkey_cntr = rvp->pkey_violations;
+ props->qkey_viol_cntr = rvp->qkey_violations;
+ props->subnet_timeout = rvp->subnet_timeout;
+ props->init_type_reply = 0;
+
+ /* Populate the remaining ib_port_attr elements */
+ return rdi->driver_f.query_port_state(rdi, port_num, props);
+}
+
+/**
+ * rvt_modify_port
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @port_modify_mask: How to change the port
+ * @props: Structure to fill in
+ *
+ * Return: 0 on success
+ */
+static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
+ int port_modify_mask, struct ib_port_modify *props)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ struct rvt_ibport *rvp;
+ int ret = 0;
+ int port_index = ibport_num_to_idx(ibdev, port_num);
+
+ if (port_index < 0)
+ return -EINVAL;
+
+ rvp = rdi->ports[port_index];
+ rvp->port_cap_flags |= props->set_port_cap_mask;
+ rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+
+ if (props->set_port_cap_mask || props->clr_port_cap_mask)
+ rdi->driver_f.cap_mask_chg(rdi, port_num);
+ if (port_modify_mask & IB_PORT_SHUTDOWN)
+ ret = rdi->driver_f.shut_down_port(rdi, port_num);
+ if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ rvp->qkey_violations = 0;
+
+ return ret;
+}
+
+/**
+ * rvt_query_pkey - Return a pkey from the table at a given index
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @intex: Index into pkey table
+ *
+ * Return: 0 on failure pkey otherwise
+ */
+static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index,
+ u16 *pkey)
+{
+ /*
+ * Driver will be responsible for keeping rvt_dev_info.pkey_table up to
+ * date. This function will just return that value. There is no need to
+ * lock, if a stale value is read and sent to the user so be it there is
+ * no way to protect against that anyway.
+ */
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ int port_index;
+
+ port_index = ibport_num_to_idx(ibdev, port_num);
+ if (port_index < 0)
+ return -EINVAL;
+
+ if (index >= rvt_get_npkeys(rdi))
+ return -EINVAL;
+
+ *pkey = rvt_get_pkey(rdi, port_index, index);
+ return 0;
+}
+
+/**
+ * rvt_query_gid - Return a gid from the table
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @index: = Index in table
+ * @gid: Gid to return
+ *
+ * Return: 0 on success
+ */
+static int rvt_query_gid(struct ib_device *ibdev, u8 port_num,
+ int guid_index, union ib_gid *gid)
+{
+ struct rvt_dev_info *rdi;
+ struct rvt_ibport *rvp;
+ int port_index;
+
+ /*
+ * Driver is responsible for updating the guid table. Which will be used
+ * to craft the return value. This will work similar to how query_pkey()
+ * is being done.
+ */
+ port_index = ibport_num_to_idx(ibdev, port_num);
+ if (port_index < 0)
+ return -EINVAL;
+
+ rdi = ib_to_rvt(ibdev);
+ rvp = rdi->ports[port_index];
+
+ gid->global.subnet_prefix = rvp->gid_prefix;
+
+ return rdi->driver_f.get_guid_be(rdi, rvp, guid_index,
+ &gid->global.interface_id);
+}
+
+struct rvt_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct rvt_ucontext, ibucontext);
+}
+
+/**
+ * rvt_alloc_ucontext - Allocate a user context
+ * @ibdev: Vers IB dev
+ * @data: User data allocated
+ */
+static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct rvt_ucontext *context;
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+ return &context->ibucontext;
+}
+
+/**
+ *rvt_dealloc_ucontext - Free a user context
+ *@context - Free this
+ */
+static int rvt_dealloc_ucontext(struct ib_ucontext *context)
+{
+ kfree(to_iucontext(context));
+ return 0;
+}
+
+static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ struct ib_port_attr attr;
+ int err, port_index;
+
+ port_index = ibport_num_to_idx(ibdev, port_num);
+ if (port_index < 0)
+ return -EINVAL;
+
+ err = rvt_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+ immutable->max_mad_size = rdi->dparms.max_mad_size;
+
+ return 0;
+}
+
+enum {
+ MISC,
+ QUERY_DEVICE,
+ MODIFY_DEVICE,
+ QUERY_PORT,
+ MODIFY_PORT,
+ QUERY_PKEY,
+ QUERY_GID,
+ ALLOC_UCONTEXT,
+ DEALLOC_UCONTEXT,
+ GET_PORT_IMMUTABLE,
+ CREATE_QP,
+ MODIFY_QP,
+ DESTROY_QP,
+ QUERY_QP,
+ POST_SEND,
+ POST_RECV,
+ POST_SRQ_RECV,
+ CREATE_AH,
+ DESTROY_AH,
+ MODIFY_AH,
+ QUERY_AH,
+ CREATE_SRQ,
+ MODIFY_SRQ,
+ DESTROY_SRQ,
+ QUERY_SRQ,
+ ATTACH_MCAST,
+ DETACH_MCAST,
+ GET_DMA_MR,
+ REG_USER_MR,
+ DEREG_MR,
+ ALLOC_MR,
+ ALLOC_FMR,
+ MAP_PHYS_FMR,
+ UNMAP_FMR,
+ DEALLOC_FMR,
+ MMAP,
+ CREATE_CQ,
+ DESTROY_CQ,
+ POLL_CQ,
+ REQ_NOTFIY_CQ,
+ RESIZE_CQ,
+ ALLOC_PD,
+ DEALLOC_PD,
+ _VERB_IDX_MAX /* Must always be last! */
+};
+
+static inline int check_driver_override(struct rvt_dev_info *rdi,
+ size_t offset, void *func)
+{
+ if (!*(void **)((void *)&rdi->ibdev + offset)) {
+ *(void **)((void *)&rdi->ibdev + offset) = func;
+ return 0;
+ }
+
+ return 1;
+}
+
+static noinline int check_support(struct rvt_dev_info *rdi, int verb)
+{
+ switch (verb) {
+ case MISC:
+ /*
+ * These functions are not part of verbs specifically but are
+ * required for rdmavt to function.
+ */
+ if ((!rdi->driver_f.port_callback) ||
+ (!rdi->driver_f.get_card_name) ||
+ (!rdi->driver_f.get_pci_dev))
+ return -EINVAL;
+ break;
+
+ case QUERY_DEVICE:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ query_device),
+ rvt_query_device);
+ break;
+
+ case MODIFY_DEVICE:
+ /*
+ * rdmavt does not support modify device currently drivers must
+ * provide.
+ */
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ modify_device),
+ rvt_modify_device))
+ return -EOPNOTSUPP;
+ break;
+
+ case QUERY_PORT:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ query_port),
+ rvt_query_port))
+ if (!rdi->driver_f.query_port_state)
+ return -EINVAL;
+ break;
+
+ case MODIFY_PORT:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ modify_port),
+ rvt_modify_port))
+ if (!rdi->driver_f.cap_mask_chg ||
+ !rdi->driver_f.shut_down_port)
+ return -EINVAL;
+ break;
+
+ case QUERY_PKEY:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ query_pkey),
+ rvt_query_pkey);
+ break;
+
+ case QUERY_GID:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ query_gid),
+ rvt_query_gid))
+ if (!rdi->driver_f.get_guid_be)
+ return -EINVAL;
+ break;
+
+ case ALLOC_UCONTEXT:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ alloc_ucontext),
+ rvt_alloc_ucontext);
+ break;
+
+ case DEALLOC_UCONTEXT:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ dealloc_ucontext),
+ rvt_dealloc_ucontext);
+ break;
+
+ case GET_PORT_IMMUTABLE:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ get_port_immutable),
+ rvt_get_port_immutable);
+ break;
+
+ case CREATE_QP:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ create_qp),
+ rvt_create_qp))
+ if (!rdi->driver_f.qp_priv_alloc ||
+ !rdi->driver_f.qp_priv_free ||
+ !rdi->driver_f.notify_qp_reset ||
+ !rdi->driver_f.flush_qp_waiters ||
+ !rdi->driver_f.stop_send_queue ||
+ !rdi->driver_f.quiesce_qp)
+ return -EINVAL;
+ break;
+
+ case MODIFY_QP:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ modify_qp),
+ rvt_modify_qp))
+ if (!rdi->driver_f.notify_qp_reset ||
+ !rdi->driver_f.schedule_send ||
+ !rdi->driver_f.get_pmtu_from_attr ||
+ !rdi->driver_f.flush_qp_waiters ||
+ !rdi->driver_f.stop_send_queue ||
+ !rdi->driver_f.quiesce_qp ||
+ !rdi->driver_f.notify_error_qp ||
+ !rdi->driver_f.mtu_from_qp ||
+ !rdi->driver_f.mtu_to_path_mtu ||
+ !rdi->driver_f.shut_down_port ||
+ !rdi->driver_f.cap_mask_chg)
+ return -EINVAL;
+ break;
+
+ case DESTROY_QP:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ destroy_qp),
+ rvt_destroy_qp))
+ if (!rdi->driver_f.qp_priv_free ||
+ !rdi->driver_f.notify_qp_reset ||
+ !rdi->driver_f.flush_qp_waiters ||
+ !rdi->driver_f.stop_send_queue ||
+ !rdi->driver_f.quiesce_qp)
+ return -EINVAL;
+ break;
+
+ case QUERY_QP:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ query_qp),
+ rvt_query_qp);
+ break;
+
+ case POST_SEND:
+ if (!check_driver_override(rdi, offsetof(struct ib_device,
+ post_send),
+ rvt_post_send))
+ if (!rdi->driver_f.schedule_send ||
+ !rdi->driver_f.do_send)
+ return -EINVAL;
+ break;
+
+ case POST_RECV:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ post_recv),
+ rvt_post_recv);
+ break;
+ case POST_SRQ_RECV:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ post_srq_recv),
+ rvt_post_srq_recv);
+ break;
+
+ case CREATE_AH:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ create_ah),
+ rvt_create_ah);
+ break;
+
+ case DESTROY_AH:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ destroy_ah),
+ rvt_destroy_ah);
+ break;
+
+ case MODIFY_AH:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ modify_ah),
+ rvt_modify_ah);
+ break;
+
+ case QUERY_AH:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ query_ah),
+ rvt_query_ah);
+ break;
+
+ case CREATE_SRQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ create_srq),
+ rvt_create_srq);
+ break;
+
+ case MODIFY_SRQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ modify_srq),
+ rvt_modify_srq);
+ break;
+
+ case DESTROY_SRQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ destroy_srq),
+ rvt_destroy_srq);
+ break;
+
+ case QUERY_SRQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ query_srq),
+ rvt_query_srq);
+ break;
+
+ case ATTACH_MCAST:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ attach_mcast),
+ rvt_attach_mcast);
+ break;
+
+ case DETACH_MCAST:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ detach_mcast),
+ rvt_detach_mcast);
+ break;
+
+ case GET_DMA_MR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ get_dma_mr),
+ rvt_get_dma_mr);
+ break;
+
+ case REG_USER_MR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ reg_user_mr),
+ rvt_reg_user_mr);
+ break;
+
+ case DEREG_MR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ dereg_mr),
+ rvt_dereg_mr);
+ break;
+
+ case ALLOC_FMR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ alloc_fmr),
+ rvt_alloc_fmr);
+ break;
+
+ case ALLOC_MR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ alloc_mr),
+ rvt_alloc_mr);
+ break;
+
+ case MAP_PHYS_FMR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ map_phys_fmr),
+ rvt_map_phys_fmr);
+ break;
+
+ case UNMAP_FMR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ unmap_fmr),
+ rvt_unmap_fmr);
+ break;
+
+ case DEALLOC_FMR:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ dealloc_fmr),
+ rvt_dealloc_fmr);
+ break;
+
+ case MMAP:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ mmap),
+ rvt_mmap);
+ break;
+
+ case CREATE_CQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ create_cq),
+ rvt_create_cq);
+ break;
+
+ case DESTROY_CQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ destroy_cq),
+ rvt_destroy_cq);
+ break;
+
+ case POLL_CQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ poll_cq),
+ rvt_poll_cq);
+ break;
+
+ case REQ_NOTFIY_CQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ req_notify_cq),
+ rvt_req_notify_cq);
+ break;
+
+ case RESIZE_CQ:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ resize_cq),
+ rvt_resize_cq);
+ break;
+
+ case ALLOC_PD:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ alloc_pd),
+ rvt_alloc_pd);
+ break;
+
+ case DEALLOC_PD:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ dealloc_pd),
+ rvt_dealloc_pd);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * rvt_register_device - register a driver
+ * @rdi: main dev structure for all of rdmavt operations
+ *
+ * It is up to drivers to allocate the rdi and fill in the appropriate
+ * information.
+ *
+ * Return: 0 on success otherwise an errno.
+ */
+int rvt_register_device(struct rvt_dev_info *rdi)
+{
+ int ret = 0, i;
+
+ if (!rdi)
+ return -EINVAL;
+
+ /*
+ * Check to ensure drivers have setup the required helpers for the verbs
+ * they want rdmavt to handle
+ */
+ for (i = 0; i < _VERB_IDX_MAX; i++)
+ if (check_support(rdi, i)) {
+ pr_err("Driver support req not met at %d\n", i);
+ return -EINVAL;
+ }
+
+
+ /* Once we get past here we can use rvt_pr macros and tracepoints */
+ trace_rvt_dbg(rdi, "Driver attempting registration");
+ rvt_mmap_init(rdi);
+
+ /* Queue Pairs */
+ ret = rvt_driver_qp_init(rdi);
+ if (ret) {
+ pr_err("Error in driver QP init.\n");
+ return -EINVAL;
+ }
+
+ /* Address Handle */
+ spin_lock_init(&rdi->n_ahs_lock);
+ rdi->n_ahs_allocated = 0;
+
+ /* Shared Receive Queue */
+ rvt_driver_srq_init(rdi);
+
+ /* Multicast */
+ rvt_driver_mcast_init(rdi);
+
+ /* Mem Region */
+ ret = rvt_driver_mr_init(rdi);
+ if (ret) {
+ pr_err("Error in driver MR init.\n");
+ goto bail_no_mr;
+ }
+
+ /* Completion queues */
+ ret = rvt_driver_cq_init(rdi);
+ if (ret) {
+ pr_err("Error in driver CQ init.\n");
+ goto bail_mr;
+ }
+
+ /* DMA Operations */
+ rdi->ibdev.dma_ops =
+ rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+
+ /* Protection Domain */
+ spin_lock_init(&rdi->n_pds_lock);
+ rdi->n_pds_allocated = 0;
+
+ /*
+ * There are some things which could be set by underlying drivers but
+ * really should be up to rdmavt to set. For instance drivers can't know
+ * exactly which functions rdmavt supports, nor do they know the ABI
+ * version, so we do all of this sort of stuff here.
+ */
+ rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION;
+ rdi->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ rdi->ibdev.node_type = RDMA_NODE_IB_CA;
+ rdi->ibdev.num_comp_vectors = 1;
+
+ /* We are now good to announce we exist */
+ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
+ if (ret) {
+ rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
+ goto bail_cq;
+ }
+
+ rvt_create_mad_agents(rdi);
+
+ rvt_pr_info(rdi, "Registration with rdmavt done.\n");
+ return ret;
+
+bail_cq:
+ rvt_cq_exit(rdi);
+
+bail_mr:
+ rvt_mr_exit(rdi);
+
+bail_no_mr:
+ rvt_qp_exit(rdi);
+
+ return ret;
+}
+EXPORT_SYMBOL(rvt_register_device);
+
+/**
+ * rvt_unregister_device - remove a driver
+ * @rdi: rvt dev struct
+ */
+void rvt_unregister_device(struct rvt_dev_info *rdi)
+{
+ trace_rvt_dbg(rdi, "Driver is unregistering.");
+ if (!rdi)
+ return;
+
+ rvt_free_mad_agents(rdi);
+
+ ib_unregister_device(&rdi->ibdev);
+ rvt_cq_exit(rdi);
+ rvt_mr_exit(rdi);
+ rvt_qp_exit(rdi);
+}
+EXPORT_SYMBOL(rvt_unregister_device);
+
+/**
+ * rvt_init_port - init internal data for driver port
+ * @rdi: rvt dev strut
+ * @port: rvt port
+ * @port_index: 0 based index of ports, different from IB core port num
+ *
+ * Keep track of a list of ports. No need to have a detach port.
+ * They persist until the driver goes away.
+ *
+ * Return: always 0
+ */
+int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
+ int port_index, u16 *pkey_table)
+{
+
+ rdi->ports[port_index] = port;
+ rdi->ports[port_index]->pkey_table = pkey_table;
+
+ return 0;
+}
+EXPORT_SYMBOL(rvt_init_port);
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
new file mode 100644
index 000000000000..6b01eaa4461b
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -0,0 +1,104 @@
+#ifndef DEF_RDMAVT_H
+#define DEF_RDMAVT_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <linux/pci.h>
+#include "dma.h"
+#include "pd.h"
+#include "qp.h"
+#include "ah.h"
+#include "mr.h"
+#include "srq.h"
+#include "mcast.h"
+#include "mmap.h"
+#include "cq.h"
+#include "mad.h"
+#include "mmap.h"
+
+#define rvt_pr_info(rdi, fmt, ...) \
+ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \
+ rdi->driver_f.get_card_name(rdi), \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define rvt_pr_warn(rdi, fmt, ...) \
+ __rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \
+ rdi->driver_f.get_card_name(rdi), \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define rvt_pr_err(rdi, fmt, ...) \
+ __rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \
+ rdi->driver_f.get_card_name(rdi), \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define __rvt_pr_info(pdev, name, fmt, ...) \
+ dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+#define __rvt_pr_warn(pdev, name, fmt, ...) \
+ dev_warn(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+#define __rvt_pr_err(pdev, name, fmt, ...) \
+ dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ int port_index;
+
+ port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */
+ if ((port_index < 0) || (port_index >= rdi->dparms.nports))
+ return -EINVAL;
+
+ return port_index;
+}
+
+#endif /* DEF_RDMAVT_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 85be0de3ab26..caec8e9c4666 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -388,7 +388,7 @@ struct ipoib_dev_priv {
struct dentry *mcg_dentry;
struct dentry *path_dentry;
#endif
- int hca_caps;
+ u64 hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
unsigned max_send_sge;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 899e6b7fb8a5..f0e55e47eb54 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -180,6 +180,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
union ib_gid *dgid;
+ union ib_gid *sgid;
ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -203,13 +204,6 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
return;
}
- /*
- * Drop packets that this interface sent, ie multicast packets
- * that the HCA has replicated.
- */
- if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
- goto repost;
-
memcpy(mapping, priv->rx_ring[wr_id].mapping,
IPOIB_UD_RX_SG * sizeof *mapping);
@@ -239,6 +233,25 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
else
skb->pkt_type = PACKET_MULTICAST;
+ sgid = &((struct ib_grh *)skb->data)->sgid;
+
+ /*
+ * Drop packets that this interface sent, ie multicast packets
+ * that the HCA has replicated.
+ */
+ if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) {
+ int need_repost = 1;
+
+ if ((wc->wc_flags & IB_WC_GRH) &&
+ sgid->global.interface_id != priv->local_gid.global.interface_id)
+ need_repost = 0;
+
+ if (need_repost) {
+ dev_kfree_skb_any(skb);
+ goto repost;
+ }
+ }
+
skb_pull(skb, IB_GRH_BYTES);
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 25509bbd4a05..80807d6e5c4c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -51,6 +51,7 @@
#include <net/addrconf.h>
#include <linux/inetdevice.h>
#include <rdma/ib_cache.h>
+#include <linux/pci.h>
#define DRV_VERSION "1.0.0"
@@ -1590,11 +1591,67 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->tx_ring = NULL;
}
+static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
+}
+
+static int ipoib_get_vf_config(struct net_device *dev, int vf,
+ struct ifla_vf_info *ivf)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
+ if (err)
+ return err;
+
+ ivf->vf = vf;
+
+ return 0;
+}
+
+static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
+ return -EINVAL;
+
+ return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
+}
+
+static int ipoib_get_vf_stats(struct net_device *dev, int vf,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
+}
+
static const struct header_ops ipoib_header_ops = {
.create = ipoib_hard_header,
};
-static const struct net_device_ops ipoib_netdev_ops = {
+static const struct net_device_ops ipoib_netdev_ops_pf = {
+ .ndo_uninit = ipoib_uninit,
+ .ndo_open = ipoib_open,
+ .ndo_stop = ipoib_stop,
+ .ndo_change_mtu = ipoib_change_mtu,
+ .ndo_fix_features = ipoib_fix_features,
+ .ndo_start_xmit = ipoib_start_xmit,
+ .ndo_tx_timeout = ipoib_timeout,
+ .ndo_set_rx_mode = ipoib_set_mcast_list,
+ .ndo_get_iflink = ipoib_get_iflink,
+ .ndo_set_vf_link_state = ipoib_set_vf_link_state,
+ .ndo_get_vf_config = ipoib_get_vf_config,
+ .ndo_get_vf_stats = ipoib_get_vf_stats,
+ .ndo_set_vf_guid = ipoib_set_vf_guid,
+};
+
+static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_uninit = ipoib_uninit,
.ndo_open = ipoib_open,
.ndo_stop = ipoib_stop,
@@ -1610,7 +1667,11 @@ void ipoib_setup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- dev->netdev_ops = &ipoib_netdev_ops;
+ if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+ dev->netdev_ops = &ipoib_netdev_ops_vf;
+ else
+ dev->netdev_ops = &ipoib_netdev_ops_pf;
+
dev->header_ops = &ipoib_header_ops;
ipoib_set_ethtool_ops(dev);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1d1309091aba..0bd3cb2f3c67 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -839,7 +839,7 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work);
else
- WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
+ WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num);
}
}
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index b4729ba57c9c..3b3c63e54ed6 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -41,6 +41,7 @@ i40e-objs := i40e_main.o \
i40e_diag.o \
i40e_txrx.o \
i40e_ptp.o \
+ i40e_client.o \
i40e_virtchnl_pf.o
i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2f6210ae8ba0..1ce6e9c0427d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -58,6 +58,7 @@
#ifdef I40E_FCOE
#include "i40e_fcoe.h"
#endif
+#include "i40e_client.h"
#include "i40e_virtchnl.h"
#include "i40e_virtchnl_pf.h"
#include "i40e_txrx.h"
@@ -190,6 +191,7 @@ struct i40e_lump_tracking {
u16 search_hint;
u16 list[0];
#define I40E_PILE_VALID_BIT 0x8000
+#define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2)
};
#define I40E_DEFAULT_ATR_SAMPLE_RATE 20
@@ -282,6 +284,8 @@ struct i40e_pf {
#endif /* I40E_FCOE */
u16 num_lan_qps; /* num lan queues this PF has set up */
u16 num_lan_msix; /* num queue vectors for the base PF vsi */
+ u16 num_iwarp_msix; /* num of iwarp vectors for this PF */
+ int iwarp_base_vector;
int queues_left; /* queues left unclaimed */
u16 alloc_rss_size; /* allocated RSS queues */
u16 rss_size_max; /* HW defined max RSS queues */
@@ -329,6 +333,7 @@ struct i40e_pf {
#define I40E_FLAG_16BYTE_RX_DESC_ENABLED BIT_ULL(13)
#define I40E_FLAG_CLEAN_ADMINQ BIT_ULL(14)
#define I40E_FLAG_FILTER_SYNC BIT_ULL(15)
+#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16)
#define I40E_FLAG_PROCESS_MDD_EVENT BIT_ULL(17)
#define I40E_FLAG_PROCESS_VFLR_EVENT BIT_ULL(18)
#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19)
@@ -571,6 +576,8 @@ struct i40e_vsi {
struct kobject *kobj; /* sysfs object */
bool current_isup; /* Sync 'link up' logging */
+ void *priv; /* client driver data reference. */
+
/* VSI specific handlers */
irqreturn_t (*irq_handler)(int irq, void *data);
@@ -728,6 +735,10 @@ void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
struct i40e_vsi_context *ctxt,
u8 enabled_tc, bool is_add);
#endif
+void i40e_service_event_schedule(struct i40e_pf *pf);
+void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
+ u8 *msg, u16 len);
+
int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable);
int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
@@ -750,6 +761,17 @@ static inline void i40e_dbg_pf_exit(struct i40e_pf *pf) {}
static inline void i40e_dbg_init(void) {}
static inline void i40e_dbg_exit(void) {}
#endif /* CONFIG_DEBUG_FS*/
+/* needed by client drivers */
+int i40e_lan_add_device(struct i40e_pf *pf);
+int i40e_lan_del_device(struct i40e_pf *pf);
+void i40e_client_subtask(struct i40e_pf *pf);
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+ enum i40e_client_type type);
/**
* i40e_irq_dynamic_enable - Enable default interrupt generation settings
* @vsi: pointer to a vsi
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
new file mode 100644
index 000000000000..0e6ac841321c
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -0,0 +1,1012 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "i40e.h"
+#include "i40e_prototype.h"
+#include "i40e_client.h"
+
+static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
+
+static LIST_HEAD(i40e_devices);
+static DEFINE_MUTEX(i40e_device_mutex);
+
+static LIST_HEAD(i40e_clients);
+static DEFINE_MUTEX(i40e_client_mutex);
+
+static LIST_HEAD(i40e_client_instances);
+static DEFINE_MUTEX(i40e_client_instance_mutex);
+
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 vf_id, u8 *msg, u16 len);
+
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+ struct i40e_client *client,
+ struct i40e_qvlist_info *qvlist_info);
+
+static void i40e_client_request_reset(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 reset_level);
+
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+ struct i40e_client *client,
+ bool is_vf, u32 vf_id,
+ u32 flag, u32 valid_flag);
+
+static struct i40e_ops i40e_lan_ops = {
+ .virtchnl_send = i40e_client_virtchnl_send,
+ .setup_qvlist = i40e_client_setup_qvlist,
+ .request_reset = i40e_client_request_reset,
+ .update_vsi_ctxt = i40e_client_update_vsi_ctxt,
+};
+
+/**
+ * i40e_client_type_to_vsi_type - convert client type to vsi type
+ * @client_type: the i40e_client type
+ *
+ * returns the related vsi type value
+ **/
+static
+enum i40e_vsi_type i40e_client_type_to_vsi_type(enum i40e_client_type type)
+{
+ switch (type) {
+ case I40E_CLIENT_IWARP:
+ return I40E_VSI_IWARP;
+
+ case I40E_CLIENT_VMDQ2:
+ return I40E_VSI_VMDQ2;
+
+ default:
+ pr_err("i40e: Client type unknown\n");
+ return I40E_VSI_TYPE_UNKNOWN;
+ }
+}
+
+/**
+ * i40e_client_get_params - Get the params that can change at runtime
+ * @vsi: the VSI with the message
+ * @param: clinet param struct
+ *
+ **/
+static
+int i40e_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params)
+{
+ struct i40e_dcbx_config *dcb_cfg = &vsi->back->hw.local_dcbx_config;
+ int i = 0;
+
+ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+ u8 tc = dcb_cfg->etscfg.prioritytable[i];
+ u16 qs_handle;
+
+ /* If TC is not enabled for VSI use TC0 for UP */
+ if (!(vsi->tc_config.enabled_tc & BIT(tc)))
+ tc = 0;
+
+ qs_handle = le16_to_cpu(vsi->info.qs_handle[tc]);
+ params->qos.prio_qos[i].tc = tc;
+ params->qos.prio_qos[i].qs_handle = qs_handle;
+ if (qs_handle == I40E_AQ_VSI_QS_HANDLE_INVALID) {
+ dev_err(&vsi->back->pdev->dev, "Invalid queue set handle for TC = %d, vsi id = %d\n",
+ tc, vsi->id);
+ return -EINVAL;
+ }
+ }
+
+ params->mtu = vsi->netdev->mtu;
+ return 0;
+}
+
+/**
+ * i40e_notify_client_of_vf_msg - call the client vf message callback
+ * @vsi: the VSI with the message
+ * @vf_id: the absolute VF id that sent the message
+ * @msg: message buffer
+ * @len: length of the message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void
+i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
+{
+ struct i40e_client_instance *cdev;
+
+ if (!vsi)
+ return;
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.pf == vsi->back) {
+ if (!cdev->client ||
+ !cdev->client->ops ||
+ !cdev->client->ops->virtchnl_receive) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Cannot locate client instance virtual channel receive routine\n");
+ continue;
+ }
+ cdev->client->ops->virtchnl_receive(&cdev->lan_info,
+ cdev->client,
+ vf_id, msg, len);
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_l2_param_changes - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
+{
+ struct i40e_client_instance *cdev;
+ struct i40e_params params;
+
+ if (!vsi)
+ return;
+ memset(&params, 0, sizeof(params));
+ i40e_client_get_params(vsi, &params);
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.pf == vsi->back) {
+ if (!cdev->client ||
+ !cdev->client->ops ||
+ !cdev->client->ops->l2_param_change) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Cannot locate client instance l2_param_change routine\n");
+ continue;
+ }
+ cdev->lan_info.params = params;
+ cdev->client->ops->l2_param_change(&cdev->lan_info,
+ cdev->client,
+ &params);
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_netdev_open - call the client open callback
+ * @vsi: the VSI with netdev opened
+ *
+ * If there is a client to this netdev, call the client with open
+ **/
+void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
+{
+ struct i40e_client_instance *cdev;
+
+ if (!vsi)
+ return;
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.netdev == vsi->netdev) {
+ if (!cdev->client ||
+ !cdev->client->ops || !cdev->client->ops->open) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Cannot locate client instance open routine\n");
+ continue;
+ }
+ cdev->client->ops->open(&cdev->lan_info, cdev->client);
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_client_release_qvlist
+ * @ldev: pointer to L2 context.
+ *
+ **/
+static void i40e_client_release_qvlist(struct i40e_info *ldev)
+{
+ struct i40e_qvlist_info *qvlist_info = ldev->qvlist_info;
+ u32 i;
+
+ if (!ldev->qvlist_info)
+ return;
+
+ for (i = 0; i < qvlist_info->num_vectors; i++) {
+ struct i40e_pf *pf = ldev->pf;
+ struct i40e_qv_info *qv_info;
+ u32 reg_idx;
+
+ qv_info = &qvlist_info->qv_info[i];
+ if (!qv_info)
+ continue;
+ reg_idx = I40E_PFINT_LNKLSTN(qv_info->v_idx - 1);
+ wr32(&pf->hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+ }
+ kfree(ldev->qvlist_info);
+ ldev->qvlist_info = NULL;
+}
+
+/**
+ * i40e_notify_client_of_netdev_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to a reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
+{
+ struct i40e_client_instance *cdev;
+
+ if (!vsi)
+ return;
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.netdev == vsi->netdev) {
+ if (!cdev->client ||
+ !cdev->client->ops || !cdev->client->ops->close) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Cannot locate client instance close routine\n");
+ continue;
+ }
+ cdev->client->ops->close(&cdev->lan_info, cdev->client,
+ reset);
+ i40e_client_release_qvlist(&cdev->lan_info);
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_vf_reset - call the client vf reset callback
+ * @pf: PF device pointer
+ * @vf_id: asolute id of VF being reset
+ *
+ * If there is a client attached to this PF, notify when a VF is reset
+ **/
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
+{
+ struct i40e_client_instance *cdev;
+
+ if (!pf)
+ return;
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.pf == pf) {
+ if (!cdev->client ||
+ !cdev->client->ops ||
+ !cdev->client->ops->vf_reset) {
+ dev_dbg(&pf->pdev->dev,
+ "Cannot locate client instance VF reset routine\n");
+ continue;
+ }
+ cdev->client->ops->vf_reset(&cdev->lan_info,
+ cdev->client, vf_id);
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_vf_enable - call the client vf notification callback
+ * @pf: PF device pointer
+ * @num_vfs: the number of VFs currently enabled, 0 for disable
+ *
+ * If there is a client attached to this PF, call its VF notification routine
+ **/
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
+{
+ struct i40e_client_instance *cdev;
+
+ if (!pf)
+ return;
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.pf == pf) {
+ if (!cdev->client ||
+ !cdev->client->ops ||
+ !cdev->client->ops->vf_enable) {
+ dev_dbg(&pf->pdev->dev,
+ "Cannot locate client instance VF enable routine\n");
+ continue;
+ }
+ cdev->client->ops->vf_enable(&cdev->lan_info,
+ cdev->client, num_vfs);
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_vf_client_capable - ask the client if it likes the specified VF
+ * @pf: PF device pointer
+ * @vf_id: the VF in question
+ *
+ * If there is a client of the specified type attached to this PF, call
+ * its vf_capable routine
+ **/
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+ enum i40e_client_type type)
+{
+ struct i40e_client_instance *cdev;
+ int capable = false;
+
+ if (!pf)
+ return false;
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if (cdev->lan_info.pf == pf) {
+ if (!cdev->client ||
+ !cdev->client->ops ||
+ !cdev->client->ops->vf_capable ||
+ !(cdev->client->type == type)) {
+ dev_dbg(&pf->pdev->dev,
+ "Cannot locate client instance VF capability routine\n");
+ continue;
+ }
+ capable = cdev->client->ops->vf_capable(&cdev->lan_info,
+ cdev->client,
+ vf_id);
+ break;
+ }
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+ return capable;
+}
+
+/**
+ * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi
+ * @pf: board private structure
+ * @type: vsi type
+ * @start_vsi: a VSI pointer from where to start the search
+ *
+ * Returns non NULL on success or NULL for failure
+ **/
+struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
+ enum i40e_vsi_type type,
+ struct i40e_vsi *start_vsi)
+{
+ struct i40e_vsi *vsi;
+ int i = 0;
+
+ if (start_vsi) {
+ for (i = 0; i < pf->num_alloc_vsi; i++) {
+ vsi = pf->vsi[i];
+ if (vsi == start_vsi)
+ break;
+ }
+ }
+ for (; i < pf->num_alloc_vsi; i++) {
+ vsi = pf->vsi[i];
+ if (vsi && vsi->type == type)
+ return vsi;
+ }
+
+ return NULL;
+}
+
+/**
+ * i40e_client_add_instance - add a client instance struct to the instance list
+ * @pf: pointer to the board struct
+ * @client: pointer to a client struct in the client list.
+ *
+ * Returns cdev ptr on success, NULL on failure
+ **/
+static
+struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
+ struct i40e_client *client)
+{
+ struct i40e_client_instance *cdev;
+ struct netdev_hw_addr *mac = NULL;
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry(cdev, &i40e_client_instances, list) {
+ if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
+ cdev = NULL;
+ goto out;
+ }
+ }
+ cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+ if (!cdev)
+ goto out;
+
+ cdev->lan_info.pf = (void *)pf;
+ cdev->lan_info.netdev = vsi->netdev;
+ cdev->lan_info.pcidev = pf->pdev;
+ cdev->lan_info.fid = pf->hw.pf_id;
+ cdev->lan_info.ftype = I40E_CLIENT_FTYPE_PF;
+ cdev->lan_info.hw_addr = pf->hw.hw_addr;
+ cdev->lan_info.ops = &i40e_lan_ops;
+ cdev->lan_info.version.major = I40E_CLIENT_VERSION_MAJOR;
+ cdev->lan_info.version.minor = I40E_CLIENT_VERSION_MINOR;
+ cdev->lan_info.version.build = I40E_CLIENT_VERSION_BUILD;
+ cdev->lan_info.fw_maj_ver = pf->hw.aq.fw_maj_ver;
+ cdev->lan_info.fw_min_ver = pf->hw.aq.fw_min_ver;
+ cdev->lan_info.fw_build = pf->hw.aq.fw_build;
+ set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
+
+ if (i40e_client_get_params(vsi, &cdev->lan_info.params)) {
+ kfree(cdev);
+ cdev = NULL;
+ goto out;
+ }
+
+ cdev->lan_info.msix_count = pf->num_iwarp_msix;
+ cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+
+ mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
+ struct netdev_hw_addr, list);
+ if (mac)
+ ether_addr_copy(cdev->lan_info.lanmac, mac->addr);
+ else
+ dev_err(&pf->pdev->dev, "MAC address list is empty!\n");
+
+ cdev->client = client;
+ INIT_LIST_HEAD(&cdev->list);
+ list_add(&cdev->list, &i40e_client_instances);
+out:
+ mutex_unlock(&i40e_client_instance_mutex);
+ return cdev;
+}
+
+/**
+ * i40e_client_del_instance - removes a client instance from the list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+static
+int i40e_client_del_instance(struct i40e_pf *pf, struct i40e_client *client)
+{
+ struct i40e_client_instance *cdev, *tmp;
+ int ret = -ENODEV;
+
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
+ if ((cdev->lan_info.pf != pf) || (cdev->client != client))
+ continue;
+
+ dev_info(&pf->pdev->dev, "Deleted instance of Client %s, of dev %d bus=0x%02x func=0x%02x)\n",
+ client->name, pf->hw.pf_id,
+ pf->hw.bus.device, pf->hw.bus.func);
+ list_del(&cdev->list);
+ kfree(cdev);
+ ret = 0;
+ break;
+ }
+ mutex_unlock(&i40e_client_instance_mutex);
+ return ret;
+}
+
+/**
+ * i40e_client_subtask - client maintenance work
+ * @pf: board private structure
+ **/
+void i40e_client_subtask(struct i40e_pf *pf)
+{
+ struct i40e_client_instance *cdev;
+ struct i40e_client *client;
+ int ret = 0;
+
+ if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
+ return;
+ pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+
+ /* If we're down or resetting, just bail */
+ if (test_bit(__I40E_DOWN, &pf->state) ||
+ test_bit(__I40E_CONFIG_BUSY, &pf->state))
+ return;
+
+ /* Check client state and instantiate client if client registered */
+ mutex_lock(&i40e_client_mutex);
+ list_for_each_entry(client, &i40e_clients, list) {
+ /* first check client is registered */
+ if (!test_bit(__I40E_CLIENT_REGISTERED, &client->state))
+ continue;
+
+ /* Do we also need the LAN VSI to be up, to create instance */
+ if (!(client->flags & I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE)) {
+ /* check if L2 VSI is up, if not we are not ready */
+ if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
+ continue;
+ }
+
+ /* Add the client instance to the instance list */
+ cdev = i40e_client_add_instance(pf, client);
+ if (!cdev)
+ continue;
+
+ /* Also up the ref_cnt of no. of instances of this client */
+ atomic_inc(&client->ref_cnt);
+ dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
+ client->name, pf->hw.pf_id,
+ pf->hw.bus.device, pf->hw.bus.func);
+
+ /* Send an Open request to the client */
+ atomic_inc(&cdev->ref_cnt);
+ if (client->ops && client->ops->open)
+ ret = client->ops->open(&cdev->lan_info, client);
+ atomic_dec(&cdev->ref_cnt);
+ if (!ret) {
+ set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+ } else {
+ /* remove client instance */
+ i40e_client_del_instance(pf, client);
+ atomic_dec(&client->ref_cnt);
+ continue;
+ }
+ }
+ mutex_unlock(&i40e_client_mutex);
+}
+
+/**
+ * i40e_lan_add_device - add a lan device struct to the list of lan devices
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int i40e_lan_add_device(struct i40e_pf *pf)
+{
+ struct i40e_device *ldev;
+ int ret = 0;
+
+ mutex_lock(&i40e_device_mutex);
+ list_for_each_entry(ldev, &i40e_devices, list) {
+ if (ldev->pf == pf) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ldev->pf = pf;
+ INIT_LIST_HEAD(&ldev->list);
+ list_add(&ldev->list, &i40e_devices);
+ dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n",
+ pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func);
+
+ /* Since in some cases register may have happened before a device gets
+ * added, we can schedule a subtask to go initiate the clients.
+ */
+ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+ i40e_service_event_schedule(pf);
+
+out:
+ mutex_unlock(&i40e_device_mutex);
+ return ret;
+}
+
+/**
+ * i40e_lan_del_device - removes a lan device from the device list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_lan_del_device(struct i40e_pf *pf)
+{
+ struct i40e_device *ldev, *tmp;
+ int ret = -ENODEV;
+
+ mutex_lock(&i40e_device_mutex);
+ list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) {
+ if (ldev->pf == pf) {
+ dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n",
+ pf->hw.pf_id, pf->hw.bus.device,
+ pf->hw.bus.func);
+ list_del(&ldev->list);
+ kfree(ldev);
+ ret = 0;
+ break;
+ }
+ }
+
+ mutex_unlock(&i40e_device_mutex);
+ return ret;
+}
+
+/**
+ * i40e_client_release - release client specific resources
+ * @client: pointer to the registered client
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_release(struct i40e_client *client)
+{
+ struct i40e_client_instance *cdev, *tmp;
+ struct i40e_pf *pf = NULL;
+ int ret = 0;
+
+ LIST_HEAD(cdevs_tmp);
+
+ mutex_lock(&i40e_client_instance_mutex);
+ list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
+ if (strncmp(cdev->client->name, client->name,
+ I40E_CLIENT_STR_LENGTH))
+ continue;
+ if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+ if (atomic_read(&cdev->ref_cnt) > 0) {
+ ret = I40E_ERR_NOT_READY;
+ goto out;
+ }
+ pf = (struct i40e_pf *)cdev->lan_info.pf;
+ if (client->ops && client->ops->close)
+ client->ops->close(&cdev->lan_info, client,
+ false);
+ i40e_client_release_qvlist(&cdev->lan_info);
+ clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+
+ dev_warn(&pf->pdev->dev,
+ "Client %s instance for PF id %d closed\n",
+ client->name, pf->hw.pf_id);
+ }
+ /* delete the client instance from the list */
+ list_del(&cdev->list);
+ list_add(&cdev->list, &cdevs_tmp);
+ atomic_dec(&client->ref_cnt);
+ dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
+ client->name);
+ }
+out:
+ mutex_unlock(&i40e_client_instance_mutex);
+
+ /* free the client device and release its vsi */
+ list_for_each_entry_safe(cdev, tmp, &cdevs_tmp, list) {
+ kfree(cdev);
+ }
+ return ret;
+}
+
+/**
+ * i40e_client_prepare - prepare client specific resources
+ * @client: pointer to the registered client
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_prepare(struct i40e_client *client)
+{
+ struct i40e_device *ldev;
+ struct i40e_pf *pf;
+ int ret = 0;
+
+ mutex_lock(&i40e_device_mutex);
+ list_for_each_entry(ldev, &i40e_devices, list) {
+ pf = ldev->pf;
+ /* Start the client subtask */
+ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+ i40e_service_event_schedule(pf);
+ }
+ mutex_unlock(&i40e_device_mutex);
+ return ret;
+}
+
+/**
+ * i40e_client_virtchnl_send - TBD
+ * @ldev: pointer to L2 context
+ * @client: Client pointer
+ * @vf_id: absolute VF identifier
+ * @msg: message buffer
+ * @len: length of message buffer
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 vf_id, u8 *msg, u16 len)
+{
+ struct i40e_pf *pf = ldev->pf;
+ struct i40e_hw *hw = &pf->hw;
+ i40e_status err;
+
+ err = i40e_aq_send_msg_to_vf(hw, vf_id, I40E_VIRTCHNL_OP_IWARP,
+ 0, msg, len, NULL);
+ if (err)
+ dev_err(&pf->pdev->dev, "Unable to send iWarp message to VF, error %d, aq status %d\n",
+ err, hw->aq.asq_last_status);
+
+ return err;
+}
+
+/**
+ * i40e_client_setup_qvlist
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qv_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+ struct i40e_client *client,
+ struct i40e_qvlist_info *qvlist_info)
+{
+ struct i40e_pf *pf = ldev->pf;
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_qv_info *qv_info;
+ u32 v_idx, i, reg_idx, reg;
+ u32 size;
+
+ size = sizeof(struct i40e_qvlist_info) +
+ (sizeof(struct i40e_qv_info) * (qvlist_info->num_vectors - 1));
+ ldev->qvlist_info = kzalloc(size, GFP_KERNEL);
+ ldev->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+ for (i = 0; i < qvlist_info->num_vectors; i++) {
+ qv_info = &qvlist_info->qv_info[i];
+ if (!qv_info)
+ continue;
+ v_idx = qv_info->v_idx;
+
+ /* Validate vector id belongs to this client */
+ if ((v_idx >= (pf->iwarp_base_vector + pf->num_iwarp_msix)) ||
+ (v_idx < pf->iwarp_base_vector))
+ goto err;
+
+ ldev->qvlist_info->qv_info[i] = *qv_info;
+ reg_idx = I40E_PFINT_LNKLSTN(v_idx - 1);
+
+ if (qv_info->ceq_idx == I40E_QUEUE_INVALID_IDX) {
+ /* Special case - No CEQ mapped on this vector */
+ wr32(hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+ } else {
+ reg = (qv_info->ceq_idx &
+ I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+ (I40E_QUEUE_TYPE_PE_CEQ <<
+ I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ wr32(hw, reg_idx, reg);
+
+ reg = (I40E_PFINT_CEQCTL_CAUSE_ENA_MASK |
+ (v_idx << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT) |
+ (qv_info->itr_idx <<
+ I40E_PFINT_CEQCTL_ITR_INDX_SHIFT) |
+ (I40E_QUEUE_END_OF_LIST <<
+ I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT));
+ wr32(hw, I40E_PFINT_CEQCTL(qv_info->ceq_idx), reg);
+ }
+ if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+ reg = (I40E_PFINT_AEQCTL_CAUSE_ENA_MASK |
+ (v_idx << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT) |
+ (qv_info->itr_idx <<
+ I40E_PFINT_AEQCTL_ITR_INDX_SHIFT));
+
+ wr32(hw, I40E_PFINT_AEQCTL, reg);
+ }
+ }
+
+ return 0;
+err:
+ kfree(ldev->qvlist_info);
+ ldev->qvlist_info = NULL;
+ return -EINVAL;
+}
+
+/**
+ * i40e_client_request_reset
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @level: reset level
+ **/
+static void i40e_client_request_reset(struct i40e_info *ldev,
+ struct i40e_client *client,
+ u32 reset_level)
+{
+ struct i40e_pf *pf = ldev->pf;
+
+ switch (reset_level) {
+ case I40E_CLIENT_RESET_LEVEL_PF:
+ set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+ break;
+ case I40E_CLIENT_RESET_LEVEL_CORE:
+ set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+ break;
+ default:
+ dev_warn(&pf->pdev->dev,
+ "Client %s instance for PF id %d request an unsupported reset: %d.\n",
+ client->name, pf->hw.pf_id, reset_level);
+ break;
+ }
+
+ i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_client_update_vsi_ctxt
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @is_vf: if this for the VF
+ * @vf_id: if is_vf true this carries the vf_id
+ * @flag: Any device level setting that needs to be done for PE
+ * @valid_flag: Bits in this match up and enable changing of flag bits
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+ struct i40e_client *client,
+ bool is_vf, u32 vf_id,
+ u32 flag, u32 valid_flag)
+{
+ struct i40e_pf *pf = ldev->pf;
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ struct i40e_vsi_context ctxt;
+ bool update = true;
+ i40e_status err;
+
+ /* TODO: for now do not allow setting VF's VSI setting */
+ if (is_vf)
+ return -EINVAL;
+
+ ctxt.seid = pf->main_vsi_seid;
+ ctxt.pf_num = pf->hw.pf_id;
+ err = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+ ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+ if (err) {
+ dev_info(&pf->pdev->dev,
+ "couldn't get PF vsi config, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENOENT;
+ }
+
+ if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
+ (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+ ctxt.info.valid_sections =
+ cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+ ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+ } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
+ !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+ ctxt.info.valid_sections =
+ cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+ ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+ } else {
+ update = false;
+ dev_warn(&pf->pdev->dev,
+ "Client %s instance for PF id %d request an unsupported Config: %x.\n",
+ client->name, pf->hw.pf_id, flag);
+ }
+
+ if (update) {
+ err = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (err) {
+ dev_info(&pf->pdev->dev,
+ "update VSI ctxt for PE failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ }
+ }
+ return err;
+}
+
+/**
+ * i40e_register_client - Register a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_register_client(struct i40e_client *client)
+{
+ int ret = 0;
+ enum i40e_vsi_type vsi_type;
+
+ if (!client) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (strlen(client->name) == 0) {
+ pr_info("i40e: Failed to register client with no name\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ mutex_lock(&i40e_client_mutex);
+ if (i40e_client_is_registered(client)) {
+ pr_info("i40e: Client %s has already been registered!\n",
+ client->name);
+ mutex_unlock(&i40e_client_mutex);
+ ret = -EEXIST;
+ goto out;
+ }
+
+ if ((client->version.major != I40E_CLIENT_VERSION_MAJOR) ||
+ (client->version.minor != I40E_CLIENT_VERSION_MINOR)) {
+ pr_info("i40e: Failed to register client %s due to mismatched client interface version\n",
+ client->name);
+ pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
+ client->version.major, client->version.minor,
+ client->version.build,
+ i40e_client_interface_version_str);
+ mutex_unlock(&i40e_client_mutex);
+ ret = -EIO;
+ goto out;
+ }
+
+ vsi_type = i40e_client_type_to_vsi_type(client->type);
+ if (vsi_type == I40E_VSI_TYPE_UNKNOWN) {
+ pr_info("i40e: Failed to register client %s due to unknown client type %d\n",
+ client->name, client->type);
+ mutex_unlock(&i40e_client_mutex);
+ ret = -EIO;
+ goto out;
+ }
+ list_add(&client->list, &i40e_clients);
+ set_bit(__I40E_CLIENT_REGISTERED, &client->state);
+ mutex_unlock(&i40e_client_mutex);
+
+ if (i40e_client_prepare(client)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ pr_info("i40e: Registered client %s with return code %d\n",
+ client->name, ret);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(i40e_register_client);
+
+/**
+ * i40e_unregister_client - Unregister a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_unregister_client(struct i40e_client *client)
+{
+ int ret = 0;
+
+ /* When a unregister request comes through we would have to send
+ * a close for each of the client instances that were opened.
+ * client_release function is called to handle this.
+ */
+ if (!client || i40e_client_release(client)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /* TODO: check if device is in reset, or if that matters? */
+ mutex_lock(&i40e_client_mutex);
+ if (!i40e_client_is_registered(client)) {
+ pr_info("i40e: Client %s has not been registered\n",
+ client->name);
+ mutex_unlock(&i40e_client_mutex);
+ ret = -ENODEV;
+ goto out;
+ }
+ if (atomic_read(&client->ref_cnt) == 0) {
+ clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
+ list_del(&client->list);
+ pr_info("i40e: Unregistered client %s with return code %d\n",
+ client->name, ret);
+ } else {
+ ret = I40E_ERR_NOT_READY;
+ pr_err("i40e: Client %s failed unregister - client has open instances\n",
+ client->name);
+ }
+
+ mutex_unlock(&i40e_client_mutex);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(i40e_unregister_client);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
new file mode 100644
index 000000000000..bf6b453d93a1
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -0,0 +1,232 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_CLIENT_H_
+#define _I40E_CLIENT_H_
+
+#define I40E_CLIENT_STR_LENGTH 10
+
+/* Client interface version should be updated anytime there is a change in the
+ * existing APIs or data structures.
+ */
+#define I40E_CLIENT_VERSION_MAJOR 0
+#define I40E_CLIENT_VERSION_MINOR 01
+#define I40E_CLIENT_VERSION_BUILD 00
+#define I40E_CLIENT_VERSION_STR \
+ XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \
+ XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \
+ XSTRINGIFY(I40E_CLIENT_VERSION_BUILD)
+
+struct i40e_client_version {
+ u8 major;
+ u8 minor;
+ u8 build;
+ u8 rsvd;
+};
+
+enum i40e_client_state {
+ __I40E_CLIENT_NULL,
+ __I40E_CLIENT_REGISTERED
+};
+
+enum i40e_client_instance_state {
+ __I40E_CLIENT_INSTANCE_NONE,
+ __I40E_CLIENT_INSTANCE_OPENED,
+};
+
+enum i40e_client_type {
+ I40E_CLIENT_IWARP,
+ I40E_CLIENT_VMDQ2
+};
+
+struct i40e_ops;
+struct i40e_client;
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define I40E_QUEUE_TYPE_PE_AEQ 0x80
+#define I40E_QUEUE_INVALID_IDX 0xFFFF
+
+struct i40e_qv_info {
+ u32 v_idx; /* msix_vector */
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct i40e_qvlist_info {
+ u32 num_vectors;
+ struct i40e_qv_info qv_info[1];
+};
+
+#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF
+
+/* set of LAN parameters useful for clients managed by LAN */
+
+/* Struct to hold per priority info */
+struct i40e_prio_qos_params {
+ u16 qs_handle; /* qs handle for prio */
+ u8 tc; /* TC mapped to prio */
+ u8 reserved;
+};
+
+#define I40E_CLIENT_MAX_USER_PRIORITY 8
+/* Struct to hold Client QoS */
+struct i40e_qos_params {
+ struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
+};
+
+struct i40e_params {
+ struct i40e_qos_params qos;
+ u16 mtu;
+};
+
+/* Structure to hold Lan device info for a client device */
+struct i40e_info {
+ struct i40e_client_version version;
+ u8 lanmac[6];
+ struct net_device *netdev;
+ struct pci_dev *pcidev;
+ u8 __iomem *hw_addr;
+ u8 fid; /* function id, PF id or VF id */
+#define I40E_CLIENT_FTYPE_PF 0
+#define I40E_CLIENT_FTYPE_VF 1
+ u8 ftype; /* function type, PF or VF */
+ void *pf;
+
+ /* All L2 params that could change during the life span of the PF
+ * and needs to be communicated to the client when they change
+ */
+ struct i40e_qvlist_info *qvlist_info;
+ struct i40e_params params;
+ struct i40e_ops *ops;
+
+ u16 msix_count; /* number of msix vectors*/
+ /* Array down below will be dynamically allocated based on msix_count */
+ struct msix_entry *msix_entries;
+ u16 itr_index; /* Which ITR index the PE driver is suppose to use */
+ u16 fw_maj_ver; /* firmware major version */
+ u16 fw_min_ver; /* firmware minor version */
+ u32 fw_build; /* firmware build number */
+};
+
+#define I40E_CLIENT_RESET_LEVEL_PF 1
+#define I40E_CLIENT_RESET_LEVEL_CORE 2
+#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE BIT(1)
+
+struct i40e_ops {
+ /* setup_q_vector_list enables queues with a particular vector */
+ int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client,
+ struct i40e_qvlist_info *qv_info);
+
+ int (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client,
+ u32 vf_id, u8 *msg, u16 len);
+
+ /* If the PE Engine is unresponsive, RDMA driver can request a reset.
+ * The level helps determine the level of reset being requested.
+ */
+ void (*request_reset)(struct i40e_info *ldev,
+ struct i40e_client *client, u32 level);
+
+ /* API for the RDMA driver to set certain VSI flags that control
+ * PE Engine.
+ */
+ int (*update_vsi_ctxt)(struct i40e_info *ldev,
+ struct i40e_client *client,
+ bool is_vf, u32 vf_id,
+ u32 flag, u32 valid_flag);
+};
+
+struct i40e_client_ops {
+ /* Should be called from register_client() or whenever PF is ready
+ * to create a specific client instance.
+ */
+ int (*open)(struct i40e_info *ldev, struct i40e_client *client);
+
+ /* Should be called when netdev is unavailable or when unregister
+ * call comes in. If the close is happenening due to a reset being
+ * triggered set the reset bit to true.
+ */
+ void (*close)(struct i40e_info *ldev, struct i40e_client *client,
+ bool reset);
+
+ /* called when some l2 managed parameters changes - mtu */
+ void (*l2_param_change)(struct i40e_info *ldev,
+ struct i40e_client *client,
+ struct i40e_params *params);
+
+ int (*virtchnl_receive)(struct i40e_info *ldev,
+ struct i40e_client *client, u32 vf_id,
+ u8 *msg, u16 len);
+
+ /* called when a VF is reset by the PF */
+ void (*vf_reset)(struct i40e_info *ldev,
+ struct i40e_client *client, u32 vf_id);
+
+ /* called when the number of VFs changes */
+ void (*vf_enable)(struct i40e_info *ldev,
+ struct i40e_client *client, u32 num_vfs);
+
+ /* returns true if VF is capable of specified offload */
+ int (*vf_capable)(struct i40e_info *ldev,
+ struct i40e_client *client, u32 vf_id);
+};
+
+/* Client device */
+struct i40e_client_instance {
+ struct list_head list;
+ struct i40e_info lan_info;
+ struct i40e_client *client;
+ unsigned long state;
+ /* A count of all the in-progress calls to the client */
+ atomic_t ref_cnt;
+};
+
+struct i40e_client {
+ struct list_head list; /* list of registered clients */
+ char name[I40E_CLIENT_STR_LENGTH];
+ struct i40e_client_version version;
+ unsigned long state; /* client state */
+ atomic_t ref_cnt; /* Count of all the client devices of this kind */
+ u32 flags;
+#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0)
+#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2)
+ enum i40e_client_type type;
+ struct i40e_client_ops *ops; /* client ops provided by the client */
+};
+
+static inline bool i40e_client_is_registered(struct i40e_client *client)
+{
+ return test_bit(__I40E_CLIENT_REGISTERED, &client->state);
+}
+
+/* used by clients */
+int i40e_register_client(struct i40e_client *client);
+int i40e_unregister_client(struct i40e_client *client);
+
+#endif /* _I40E_CLIENT_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 70d9605a0d9e..67006431726a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -289,7 +289,7 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
*
* If not already scheduled, this puts the task into the work queue
**/
-static void i40e_service_event_schedule(struct i40e_pf *pf)
+void i40e_service_event_schedule(struct i40e_pf *pf)
{
if (!test_bit(__I40E_DOWN, &pf->state) &&
!test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) &&
@@ -2230,7 +2230,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
netdev->mtu = new_mtu;
if (netif_running(netdev))
i40e_vsi_reinit_locked(vsi);
-
+ i40e_notify_client_of_l2_param_changes(vsi);
return 0;
}
@@ -4169,6 +4169,9 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
free_irq(pf->msix_entries[0].vector, pf);
}
+ i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
+ I40E_IWARP_IRQ_PILE_ID);
+
i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
for (i = 0; i < pf->num_alloc_vsi; i++)
if (pf->vsi[i])
@@ -4212,12 +4215,17 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
**/
static void i40e_vsi_close(struct i40e_vsi *vsi)
{
+ bool reset = false;
+
if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
i40e_down(vsi);
i40e_vsi_free_irq(vsi);
i40e_vsi_free_tx_resources(vsi);
i40e_vsi_free_rx_resources(vsi);
vsi->current_netdev_flags = 0;
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
+ reset = true;
+ i40e_notify_client_of_netdev_close(vsi, reset);
}
/**
@@ -4850,6 +4858,12 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
ctxt.info = vsi->info;
i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+ if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+ ctxt.info.valid_sections |=
+ cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+ ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+ }
+
/* Update the VSI after updating the VSI queue-mapping information */
ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
if (ret) {
@@ -4993,6 +5007,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
if (pf->vsi[v]->netdev)
i40e_dcbnl_set_all(pf->vsi[v]);
}
+ i40e_notify_client_of_l2_param_changes(pf->vsi[v]);
}
}
@@ -5191,6 +5206,11 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
}
i40e_fdir_filter_restore(vsi);
}
+
+ /* On the next run of the service_task, notify any clients of the new
+ * opened netdev
+ */
+ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
i40e_service_event_schedule(pf);
return 0;
@@ -5379,6 +5399,8 @@ int i40e_open(struct net_device *netdev)
geneve_get_rx_port(netdev);
#endif
+ i40e_notify_client_of_netdev_open(vsi);
+
return 0;
}
@@ -6043,6 +6065,7 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
case I40E_VSI_SRIOV:
case I40E_VSI_VMDQ2:
case I40E_VSI_CTRL:
+ case I40E_VSI_IWARP:
case I40E_VSI_MIRROR:
default:
/* there is no notification for other VSIs */
@@ -7148,6 +7171,7 @@ static void i40e_service_task(struct work_struct *work)
i40e_vc_process_vflr_event(pf);
i40e_watchdog_subtask(pf);
i40e_fdir_reinit_subtask(pf);
+ i40e_client_subtask(pf);
i40e_sync_filters_subtask(pf);
i40e_sync_udp_filters_subtask(pf);
i40e_clean_adminq_subtask(pf);
@@ -7550,6 +7574,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
int vectors_left;
int v_budget, i;
int v_actual;
+ int iwarp_requested = 0;
if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
return -ENODEV;
@@ -7563,6 +7588,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
* is governed by number of cpus in the system.
* - assumes symmetric Tx/Rx pairing
* - The number of VMDq pairs
+ * - The CPU count within the NUMA node if iWARP is enabled
#ifdef I40E_FCOE
* - The number of FCOE qps.
#endif
@@ -7609,6 +7635,16 @@ static int i40e_init_msix(struct i40e_pf *pf)
}
#endif
+ /* can we reserve enough for iWARP? */
+ if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (!vectors_left)
+ pf->num_iwarp_msix = 0;
+ else if (vectors_left < pf->num_iwarp_msix)
+ pf->num_iwarp_msix = 1;
+ v_budget += pf->num_iwarp_msix;
+ vectors_left -= pf->num_iwarp_msix;
+ }
+
/* any vectors left over go for VMDq support */
if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps;
@@ -7643,6 +7679,8 @@ static int i40e_init_msix(struct i40e_pf *pf)
* of these features based on the policy and at the end disable
* the features that did not get any vectors.
*/
+ iwarp_requested = pf->num_iwarp_msix;
+ pf->num_iwarp_msix = 0;
#ifdef I40E_FCOE
pf->num_fcoe_qps = 0;
pf->num_fcoe_msix = 0;
@@ -7681,17 +7719,33 @@ static int i40e_init_msix(struct i40e_pf *pf)
pf->num_lan_msix = 1;
break;
case 3:
+ if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ pf->num_lan_msix = 1;
+ pf->num_iwarp_msix = 1;
+ } else {
+ pf->num_lan_msix = 2;
+ }
#ifdef I40E_FCOE
/* give one vector to FCoE */
if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
pf->num_lan_msix = 1;
pf->num_fcoe_msix = 1;
}
-#else
- pf->num_lan_msix = 2;
#endif
break;
default:
+ if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ pf->num_iwarp_msix = min_t(int, (vec / 3),
+ iwarp_requested);
+ pf->num_vmdq_vsis = min_t(int, (vec / 3),
+ I40E_DEFAULT_NUM_VMDQ_VSI);
+ } else {
+ pf->num_vmdq_vsis = min_t(int, (vec / 2),
+ I40E_DEFAULT_NUM_VMDQ_VSI);
+ }
+ pf->num_lan_msix = min_t(int,
+ (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
+ pf->num_lan_msix);
#ifdef I40E_FCOE
/* give one vector to FCoE */
if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
@@ -7699,8 +7753,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
vec--;
}
#endif
- /* give the rest to the PF */
- pf->num_lan_msix = min_t(int, vec, pf->num_lan_qps);
break;
}
}
@@ -7710,6 +7762,12 @@ static int i40e_init_msix(struct i40e_pf *pf)
dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
}
+
+ if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+ (pf->num_iwarp_msix == 0)) {
+ dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
+ pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+ }
#ifdef I40E_FCOE
if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) {
@@ -7801,6 +7859,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
vectors = i40e_init_msix(pf);
if (vectors < 0) {
pf->flags &= ~(I40E_FLAG_MSIX_ENABLED |
+ I40E_FLAG_IWARP_ENABLED |
#ifdef I40E_FCOE
I40E_FLAG_FCOE_ENABLED |
#endif
@@ -8474,6 +8533,12 @@ static int i40e_sw_init(struct i40e_pf *pf)
pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
}
+ if (pf->hw.func_caps.iwarp) {
+ pf->flags |= I40E_FLAG_IWARP_ENABLED;
+ /* IWARP needs one extra vector for CQP just like MISC.*/
+ pf->num_iwarp_msix = (int)num_online_cpus() + 1;
+ }
+
#ifdef I40E_FCOE
i40e_init_pf_fcoe(pf);
@@ -9328,6 +9393,13 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
}
+ if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+ ctxt.info.valid_sections |=
+ cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+ ctxt.info.queueing_opt_flags |=
+ I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+ }
+
ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
if (pf->vf[vsi->vf_id].spoofchk) {
@@ -9351,6 +9423,10 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
break;
#endif /* I40E_FCOE */
+ case I40E_VSI_IWARP:
+ /* send down message to iWARP */
+ break;
+
default:
return -ENODEV;
}
@@ -10467,6 +10543,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
/* make sure all the fancies are disabled */
pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
+ I40E_FLAG_IWARP_ENABLED |
#ifdef I40E_FCOE
I40E_FLAG_FCOE_ENABLED |
#endif
@@ -10484,6 +10561,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
queues_left -= pf->num_lan_qps;
pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
+ I40E_FLAG_IWARP_ENABLED |
#ifdef I40E_FCOE
I40E_FLAG_FCOE_ENABLED |
#endif
@@ -11059,7 +11137,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
#endif /* CONFIG_PCI_IOV */
- pfs_found++;
+ if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
+ pf->num_iwarp_msix,
+ I40E_IWARP_IRQ_PILE_ID);
+ if (pf->iwarp_base_vector < 0) {
+ dev_info(&pdev->dev,
+ "failed to get tracking for %d vectors for IWARP err=%d\n",
+ pf->num_iwarp_msix, pf->iwarp_base_vector);
+ pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+ }
+ }
i40e_dbg_pf_init(pf);
@@ -11070,6 +11158,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
mod_timer(&pf->service_timer,
round_jiffies(jiffies + pf->service_timer_period));
+ /* add this PF to client device list and launch a client service task */
+ err = i40e_lan_add_device(pf);
+ if (err)
+ dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
+ err);
+
#ifdef I40E_FCOE
/* create FCoE interface */
i40e_fcoe_vsi_setup(pf);
@@ -11245,6 +11339,13 @@ static void i40e_remove(struct pci_dev *pdev)
if (pf->vsi[pf->lan_vsi])
i40e_vsi_release(pf->vsi[pf->lan_vsi]);
+ /* remove attached clients */
+ ret_code = i40e_lan_del_device(pf);
+ if (ret_code) {
+ dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+ ret_code);
+ }
+
/* shutdown and destroy the HMC */
if (hw->hmc.hmc_obj) {
ret_code = i40e_shutdown_lan_hmc(hw);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 0a0baf71041b..3335f9d13374 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -78,7 +78,7 @@ enum i40e_debug_mask {
I40E_DEBUG_DCB = 0x00000400,
I40E_DEBUG_DIAG = 0x00000800,
I40E_DEBUG_FD = 0x00001000,
-
+ I40E_DEBUG_IWARP = 0x00F00000,
I40E_DEBUG_AQ_MESSAGE = 0x01000000,
I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000,
I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000,
@@ -160,6 +160,7 @@ enum i40e_vsi_type {
I40E_VSI_MIRROR = 5,
I40E_VSI_SRIOV = 6,
I40E_VSI_FDIR = 7,
+ I40E_VSI_IWARP = 8,
I40E_VSI_TYPE_UNKNOWN
};
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index 3226946bf3d4..ab866cf3dc18 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
@@ -81,6 +81,9 @@ enum i40e_virtchnl_ops {
I40E_VIRTCHNL_OP_GET_STATS = 15,
I40E_VIRTCHNL_OP_FCOE = 16,
I40E_VIRTCHNL_OP_EVENT = 17,
+ I40E_VIRTCHNL_OP_IWARP = 20,
+ I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
+ I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
};
/* Virtual channel message descriptor. This overlays the admin queue
@@ -348,6 +351,37 @@ struct i40e_virtchnl_pf_event {
int severity;
};
+/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
+ * VF uses this message to request PF to map IWARP vectors to IWARP queues.
+ * The request for this originates from the VF IWARP driver through
+ * a client interface between VF LAN and VF IWARP driver.
+ * A vector could have an AEQ and CEQ attached to it although
+ * there is a single AEQ per VF IWARP instance in which case
+ * most vectors will have an INVALID_IDX for aeq and valid idx for ceq.
+ * There will never be a case where there will be multiple CEQs attached
+ * to a single vector.
+ * PF configures interrupt mapping and returns status.
+ */
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+*/
+#define I40E_QUEUE_TYPE_PE_AEQ 0x80
+#define I40E_QUEUE_INVALID_IDX 0xFFFF
+
+struct i40e_virtchnl_iwarp_qv_info {
+ u32 v_idx; /* msix_vector */
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct i40e_virtchnl_iwarp_qvlist_info {
+ u32 num_vectors;
+ struct i40e_virtchnl_iwarp_qv_info qv_info[1];
+};
+
/* VF reset states - these are written into the RSTAT register:
* I40E_VFGEN_RSTAT1 on the PF
* I40E_VFGEN_RSTAT on the VF
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index acd2693a4e97..816c6bbf7093 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -352,6 +352,136 @@ irq_list_done:
}
/**
+ * i40e_release_iwarp_qvlist
+ * @vf: pointer to the VF.
+ *
+ **/
+static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
+{
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info;
+ u32 msix_vf;
+ u32 i;
+
+ if (!vf->qvlist_info)
+ return;
+
+ msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+ for (i = 0; i < qvlist_info->num_vectors; i++) {
+ struct i40e_virtchnl_iwarp_qv_info *qv_info;
+ u32 next_q_index, next_q_type;
+ struct i40e_hw *hw = &pf->hw;
+ u32 v_idx, reg_idx, reg;
+
+ qv_info = &qvlist_info->qv_info[i];
+ if (!qv_info)
+ continue;
+ v_idx = qv_info->v_idx;
+ if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+ /* Figure out the queue after CEQ and make that the
+ * first queue.
+ */
+ reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+ reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx));
+ next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK)
+ >> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT;
+ next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK)
+ >> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT;
+
+ reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+ reg = (next_q_index &
+ I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+ (next_q_type <<
+ I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+ wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+ }
+ }
+ kfree(vf->qvlist_info);
+ vf->qvlist_info = NULL;
+}
+
+/**
+ * i40e_config_iwarp_qvlist
+ * @vf: pointer to the VF info
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
+ struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info)
+{
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_virtchnl_iwarp_qv_info *qv_info;
+ u32 v_idx, i, reg_idx, reg;
+ u32 next_q_idx, next_q_type;
+ u32 msix_vf, size;
+
+ size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
+ (sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+ (qvlist_info->num_vectors - 1));
+ vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+ vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+ msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+ for (i = 0; i < qvlist_info->num_vectors; i++) {
+ qv_info = &qvlist_info->qv_info[i];
+ if (!qv_info)
+ continue;
+ v_idx = qv_info->v_idx;
+
+ /* Validate vector id belongs to this vf */
+ if (!i40e_vc_isvalid_vector_id(vf, v_idx))
+ goto err;
+
+ vf->qvlist_info->qv_info[i] = *qv_info;
+
+ reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+ /* We might be sharing the interrupt, so get the first queue
+ * index and type, push it down the list by adding the new
+ * queue on top. Also link it with the new queue in CEQCTL.
+ */
+ reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx));
+ next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >>
+ I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT);
+ next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >>
+ I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+ if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+ reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+ reg = (I40E_VPINT_CEQCTL_CAUSE_ENA_MASK |
+ (v_idx << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT) |
+ (qv_info->itr_idx << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT) |
+ (next_q_type << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT) |
+ (next_q_idx << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT));
+ wr32(hw, I40E_VPINT_CEQCTL(reg_idx), reg);
+
+ reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+ reg = (qv_info->ceq_idx &
+ I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+ (I40E_QUEUE_TYPE_PE_CEQ <<
+ I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+ }
+
+ if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+ reg = (I40E_VPINT_AEQCTL_CAUSE_ENA_MASK |
+ (v_idx << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT) |
+ (qv_info->itr_idx << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT));
+
+ wr32(hw, I40E_VPINT_AEQCTL(vf->vf_id), reg);
+ }
+ }
+
+ return 0;
+err:
+ kfree(vf->qvlist_info);
+ vf->qvlist_info = NULL;
+ return -EINVAL;
+}
+
+/**
* i40e_config_vsi_tx_queue
* @vf: pointer to the VF info
* @vsi_id: id of VSI as provided by the FW
@@ -850,9 +980,11 @@ complete_reset:
/* reallocate VF resources to reset the VSI state */
i40e_free_vf_res(vf);
if (!i40e_alloc_vf_res(vf)) {
+ int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
i40e_enable_vf_mappings(vf);
set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
+ i40e_notify_client_of_vf_reset(pf, abs_vf_id);
}
/* tell the VF the reset is done */
wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
@@ -877,11 +1009,7 @@ void i40e_free_vfs(struct i40e_pf *pf)
while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state))
usleep_range(1000, 2000);
- for (i = 0; i < pf->num_alloc_vfs; i++)
- if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
- i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
- false);
-
+ i40e_notify_client_of_vf_enable(pf, 0);
for (i = 0; i < pf->num_alloc_vfs; i++)
if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
@@ -953,6 +1081,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
goto err_iov;
}
}
+ i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
/* allocate memory */
vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
if (!vfs) {
@@ -1206,6 +1335,13 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi->info.pvid)
vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+
+ if (i40e_vf_client_capable(pf, vf->vf_id, I40E_CLIENT_IWARP) &&
+ (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) {
+ vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP;
+ set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states);
+ }
+
if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
vfres->vf_offload_flags |=
@@ -1827,6 +1963,72 @@ error_param:
}
/**
+ * i40e_vc_iwarp_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+ struct i40e_pf *pf = vf->pf;
+ int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id;
+ i40e_status aq_ret = 0;
+
+ if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+ !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+
+ i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id,
+ msg, msglen);
+
+error_param:
+ /* send the response to the VF */
+ return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_IWARP,
+ aq_ret);
+}
+
+/**
+ * i40e_vc_iwarp_qvmap_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @config: config qvmap or release it
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
+ bool config)
+{
+ struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info =
+ (struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+ i40e_status aq_ret = 0;
+
+ if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+ !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (config) {
+ if (i40e_config_iwarp_qvlist(vf, qvlist_info))
+ aq_ret = I40E_ERR_PARAM;
+ } else {
+ i40e_release_iwarp_qvlist(vf);
+ }
+
+error_param:
+ /* send the response to the VF */
+ return i40e_vc_send_resp_to_vf(vf,
+ config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP :
+ I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+ aq_ret);
+}
+
+/**
* i40e_vc_validate_vf_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -1921,6 +2123,32 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
case I40E_VIRTCHNL_OP_GET_STATS:
valid_len = sizeof(struct i40e_virtchnl_queue_select);
break;
+ case I40E_VIRTCHNL_OP_IWARP:
+ /* These messages are opaque to us and will be validated in
+ * the RDMA client code. We just need to check for nonzero
+ * length. The firmware will enforce max length restrictions.
+ */
+ if (msglen)
+ valid_len = msglen;
+ else
+ err_msg_format = true;
+ break;
+ case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+ valid_len = 0;
+ break;
+ case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+ valid_len = sizeof(struct i40e_virtchnl_iwarp_qvlist_info);
+ if (msglen >= valid_len) {
+ struct i40e_virtchnl_iwarp_qvlist_info *qv =
+ (struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+ if (qv->num_vectors == 0) {
+ err_msg_format = true;
+ break;
+ }
+ valid_len += ((qv->num_vectors - 1) *
+ sizeof(struct i40e_virtchnl_iwarp_qv_info));
+ }
+ break;
/* These are always errors coming from the VF. */
case I40E_VIRTCHNL_OP_EVENT:
case I40E_VIRTCHNL_OP_UNKNOWN:
@@ -2010,6 +2238,15 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode,
case I40E_VIRTCHNL_OP_GET_STATS:
ret = i40e_vc_get_stats_msg(vf, msg, msglen);
break;
+ case I40E_VIRTCHNL_OP_IWARP:
+ ret = i40e_vc_iwarp_msg(vf, msg, msglen);
+ break;
+ case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+ ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true);
+ break;
+ case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+ ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false);
+ break;
case I40E_VIRTCHNL_OP_UNKNOWN:
default:
dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index e74642a0c42e..e7b2fba0309e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -58,6 +58,7 @@ enum i40e_queue_ctrl {
enum i40e_vf_states {
I40E_VF_STAT_INIT = 0,
I40E_VF_STAT_ACTIVE,
+ I40E_VF_STAT_IWARPENA,
I40E_VF_STAT_FCOEENA,
I40E_VF_STAT_DISABLED,
};
@@ -66,6 +67,7 @@ enum i40e_vf_states {
enum i40e_vf_capabilities {
I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
I40E_VIRTCHNL_VF_CAP_L2,
+ I40E_VIRTCHNL_VF_CAP_IWARP,
};
/* VF information structure */
@@ -106,6 +108,8 @@ struct i40e_vf {
bool link_forced;
bool link_up; /* only valid if VF link is forced */
bool spoofchk;
+ /* RDMA Client */
+ struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info;
};
void i40e_free_vfs(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 97f5114fc113..eb926e1ee71c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -407,6 +407,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
const char *mlx5_command_str(int command)
{
switch (command) {
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ return "QUERY_HCA_VPORT_CONTEXT";
+
+ case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+ return "MODIFY_HCA_VPORT_CONTEXT";
+
case MLX5_CMD_OP_QUERY_HCA_CAP:
return "QUERY_HCA_CAP";
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index aa1ab4702385..75c7ae6a5cc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -98,88 +98,55 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
{
int err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
-
- err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
if (err)
return err;
if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
- HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
- HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
if (err)
return err;
}
if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
- HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
- HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
if (err)
return err;
}
if (MLX5_CAP_GEN(dev, atomic)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
- HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
- HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
if (err)
return err;
}
if (MLX5_CAP_GEN(dev, roce)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
- HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
- HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
if (err)
return err;
}
if (MLX5_CAP_GEN(dev, nic_flow_table)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
- HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
- HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
if (err)
return err;
}
if (MLX5_CAP_GEN(dev, vport_group_manager) &&
MLX5_CAP_GEN(dev, eswitch_flow_table)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
- HCA_CAP_OPMOD_GET_CUR);
- if (err)
- return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
- HCA_CAP_OPMOD_GET_MAX);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
if (err)
return err;
}
if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
- HCA_CAP_OPMOD_GET_CUR);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
if (err)
return err;
- err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
- HCA_CAP_OPMOD_GET_MAX);
+ }
+
+ if (MLX5_CAP_GEN(dev, vector_calc)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 72a94e72ee25..3f3b2fae4991 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -341,8 +341,9 @@ static u16 to_fw_pkey_sz(u32 size)
}
}
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
- enum mlx5_cap_mode cap_mode)
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+ enum mlx5_cap_type cap_type,
+ enum mlx5_cap_mode cap_mode)
{
u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
@@ -392,6 +393,16 @@ query_ex:
return err;
}
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+ int ret;
+
+ ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+ if (ret)
+ return ret;
+ return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
{
u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
@@ -419,8 +430,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
int err;
if (MLX5_CAP_GEN(dev, atomic)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
- HCA_CAP_OPMOD_GET_CUR);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
if (err)
return err;
} else {
@@ -462,11 +472,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
if (!set_ctx)
goto query_ex;
- err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
- if (err)
- goto query_ex;
-
- err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
if (err)
goto query_ex;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 90ab09e375b8..bd518405859e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -852,7 +852,8 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
- u8 port_num, void *out, size_t out_sz)
+ int vf, u8 port_num, void *out,
+ size_t out_sz)
{
int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
int is_group_manager;
@@ -871,7 +872,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
if (other_vport) {
if (is_group_manager) {
MLX5_SET(query_vport_counter_in, in, other_vport, 1);
- MLX5_SET(query_vport_counter_in, in, vport_number, 0);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
} else {
err = -EPERM;
goto free;
@@ -890,3 +891,70 @@ free:
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ int vf,
+ struct mlx5_hca_vport_context *req)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+ u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
+ int is_group_manager;
+ void *in;
+ int err;
+ void *ctx;
+
+ mlx5_core_dbg(dev, "vf %d\n", vf);
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ memset(out, 0, sizeof(out));
+ MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) > 1)
+ MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+ ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+ MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+ MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware);
+ MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi);
+ MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw);
+ MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy);
+ MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state);
+ MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state);
+ MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+ MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm);
+ MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2);
+ MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm);
+ MLX5_SET(hca_vport_context, ctx, lid, req->lid);
+ MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply);
+ MLX5_SET(hca_vport_context, ctx, lmc, req->lmc);
+ MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout);
+ MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid);
+ MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl);
+ MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
+ MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ if (err)
+ goto ex;
+
+ err = mlx5_cmd_status_to_err_v2(out);
+
+ex:
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig
index fd25078ee923..3e668d852f03 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/staging/rdma/hfi1/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_HFI1
tristate "Intel OPA Gen1 support"
- depends on X86_64
+ depends on X86_64 && INFINIBAND_RDMAVT
+ select MMU_NOTIFIER
default m
---help---
This is a low-level driver for Intel OPA Gen1 adapter.
@@ -25,13 +26,3 @@ config SDMA_VERBOSITY
---help---
This is a configuration flag to enable verbose
SDMA debug
-config PRESCAN_RXQ
- bool "Enable prescanning of the RX queue for ECNs"
- depends on INFINIBAND_HFI1
- default n
- ---help---
- This option toggles the prescanning of the receive queue for
- Explicit Congestion Notifications. If an ECN is detected, it
- is processed as quickly as possible, the ECN is toggled off.
- After the prescanning step, the receive queue is processed as
- usual.
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile
index 68c5a315e557..8dc59382ee96 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/staging/rdma/hfi1/Makefile
@@ -7,10 +7,12 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
-hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o efivar.o eprom.o file_ops.o firmware.o \
- init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \
- qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \
- uc.o ud.o user_pages.o user_sdma.o verbs_mcast.o verbs.o
+hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
+ eprom.o file_ops.o firmware.o \
+ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
+ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
+ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
+ verbs_txreq.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c
new file mode 100644
index 000000000000..2cb8ca77f876
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/topology.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+#include "trace.h"
+
+struct cpu_mask_set {
+ struct cpumask mask;
+ struct cpumask used;
+ uint gen;
+};
+
+struct hfi1_affinity {
+ struct cpu_mask_set def_intr;
+ struct cpu_mask_set rcv_intr;
+ struct cpu_mask_set proc;
+ /* spin lock to protect affinity struct */
+ spinlock_t lock;
+};
+
+/* Name of IRQ types, indexed by enum irq_type */
+static const char * const irq_type_names[] = {
+ "SDMA",
+ "RCVCTXT",
+ "GENERAL",
+ "OTHER",
+};
+
+static inline void init_cpu_mask_set(struct cpu_mask_set *set)
+{
+ cpumask_clear(&set->mask);
+ cpumask_clear(&set->used);
+ set->gen = 0;
+}
+
+/*
+ * Interrupt affinity.
+ *
+ * non-rcv avail gets a default mask that
+ * starts as possible cpus with threads reset
+ * and each rcv avail reset.
+ *
+ * rcv avail gets node relative 1 wrapping back
+ * to the node relative 1 as necessary.
+ *
+ */
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+{
+ int node = pcibus_to_node(dd->pcidev->bus);
+ struct hfi1_affinity *info;
+ const struct cpumask *local_mask;
+ int curr_cpu, possible, i, ht;
+
+ if (node < 0)
+ node = numa_node_id();
+ dd->node = node;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ spin_lock_init(&info->lock);
+
+ init_cpu_mask_set(&info->def_intr);
+ init_cpu_mask_set(&info->rcv_intr);
+ init_cpu_mask_set(&info->proc);
+
+ local_mask = cpumask_of_node(dd->node);
+ if (cpumask_first(local_mask) >= nr_cpu_ids)
+ local_mask = topology_core_cpumask(0);
+ /* use local mask as default */
+ cpumask_copy(&info->def_intr.mask, local_mask);
+ /*
+ * Remove HT cores from the default mask. Do this in two steps below.
+ */
+ possible = cpumask_weight(&info->def_intr.mask);
+ ht = cpumask_weight(topology_sibling_cpumask(
+ cpumask_first(&info->def_intr.mask)));
+ /*
+ * Step 1. Skip over the first N HT siblings and use them as the
+ * "real" cores. Assumes that HT cores are not enumerated in
+ * succession (except in the single core case).
+ */
+ curr_cpu = cpumask_first(&info->def_intr.mask);
+ for (i = 0; i < possible / ht; i++)
+ curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+ /*
+ * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
+ * skip any gaps.
+ */
+ for (; i < possible; i++) {
+ cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+ curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+ }
+
+ /* fill in the receive list */
+ possible = cpumask_weight(&info->def_intr.mask);
+ curr_cpu = cpumask_first(&info->def_intr.mask);
+ if (possible == 1) {
+ /* only one CPU, everyone will use it */
+ cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+ } else {
+ /*
+ * Retain the first CPU in the default list for the control
+ * context.
+ */
+ curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+ /*
+ * Remove the remaining kernel receive queues from
+ * the default list and add them to the receive list.
+ */
+ for (i = 0; i < dd->n_krcv_queues - 1; i++) {
+ cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+ cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+ curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+ if (curr_cpu >= nr_cpu_ids)
+ break;
+ }
+ }
+
+ cpumask_copy(&info->proc.mask, cpu_online_mask);
+ dd->affinity = info;
+ return 0;
+}
+
+void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
+{
+ kfree(dd->affinity);
+}
+
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+ int ret;
+ cpumask_var_t diff;
+ struct cpu_mask_set *set;
+ struct sdma_engine *sde = NULL;
+ struct hfi1_ctxtdata *rcd = NULL;
+ char extra[64];
+ int cpu = -1;
+
+ extra[0] = '\0';
+ cpumask_clear(&msix->mask);
+
+ ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ switch (msix->type) {
+ case IRQ_SDMA:
+ sde = (struct sdma_engine *)msix->arg;
+ scnprintf(extra, 64, "engine %u", sde->this_idx);
+ /* fall through */
+ case IRQ_GENERAL:
+ set = &dd->affinity->def_intr;
+ break;
+ case IRQ_RCVCTXT:
+ rcd = (struct hfi1_ctxtdata *)msix->arg;
+ if (rcd->ctxt == HFI1_CTRL_CTXT) {
+ set = &dd->affinity->def_intr;
+ cpu = cpumask_first(&set->mask);
+ } else {
+ set = &dd->affinity->rcv_intr;
+ }
+ scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+ break;
+ default:
+ dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
+ return -EINVAL;
+ }
+
+ /*
+ * The control receive context is placed on a particular CPU, which
+ * is set above. Skip accounting for it. Everything else finds its
+ * CPU here.
+ */
+ if (cpu == -1) {
+ spin_lock(&dd->affinity->lock);
+ if (cpumask_equal(&set->mask, &set->used)) {
+ /*
+ * We've used up all the CPUs, bump up the generation
+ * and reset the 'used' map
+ */
+ set->gen++;
+ cpumask_clear(&set->used);
+ }
+ cpumask_andnot(diff, &set->mask, &set->used);
+ cpu = cpumask_first(diff);
+ cpumask_set_cpu(cpu, &set->used);
+ spin_unlock(&dd->affinity->lock);
+ }
+
+ switch (msix->type) {
+ case IRQ_SDMA:
+ sde->cpu = cpu;
+ break;
+ case IRQ_GENERAL:
+ case IRQ_RCVCTXT:
+ case IRQ_OTHER:
+ break;
+ }
+
+ cpumask_set_cpu(cpu, &msix->mask);
+ dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
+ msix->msix.vector, irq_type_names[msix->type],
+ extra, cpu);
+ irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+ free_cpumask_var(diff);
+ return 0;
+}
+
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix)
+{
+ struct cpu_mask_set *set = NULL;
+ struct hfi1_ctxtdata *rcd;
+
+ switch (msix->type) {
+ case IRQ_SDMA:
+ case IRQ_GENERAL:
+ set = &dd->affinity->def_intr;
+ break;
+ case IRQ_RCVCTXT:
+ rcd = (struct hfi1_ctxtdata *)msix->arg;
+ /* only do accounting for non control contexts */
+ if (rcd->ctxt != HFI1_CTRL_CTXT)
+ set = &dd->affinity->rcv_intr;
+ break;
+ default:
+ return;
+ }
+
+ if (set) {
+ spin_lock(&dd->affinity->lock);
+ cpumask_andnot(&set->used, &set->used, &msix->mask);
+ if (cpumask_empty(&set->used) && set->gen) {
+ set->gen--;
+ cpumask_copy(&set->used, &set->mask);
+ }
+ spin_unlock(&dd->affinity->lock);
+ }
+
+ irq_set_affinity_hint(msix->msix.vector, NULL);
+ cpumask_clear(&msix->mask);
+}
+
+int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
+{
+ int cpu = -1, ret;
+ cpumask_var_t diff, mask, intrs;
+ const struct cpumask *node_mask,
+ *proc_mask = tsk_cpus_allowed(current);
+ struct cpu_mask_set *set = &dd->affinity->proc;
+ char buf[1024];
+
+ /*
+ * check whether process/context affinity has already
+ * been set
+ */
+ if (cpumask_weight(proc_mask) == 1) {
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+ hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s",
+ current->pid, current->comm, buf);
+ /*
+ * Mark the pre-set CPU as used. This is atomic so we don't
+ * need the lock
+ */
+ cpu = cpumask_first(proc_mask);
+ cpumask_set_cpu(cpu, &set->used);
+ goto done;
+ } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+ hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s",
+ current->pid, current->comm, buf);
+ goto done;
+ }
+
+ /*
+ * The process does not have a preset CPU affinity so find one to
+ * recommend. We prefer CPUs on the same NUMA as the device.
+ */
+
+ ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+ if (!ret)
+ goto done;
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ goto free_diff;
+ ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
+ if (!ret)
+ goto free_mask;
+
+ spin_lock(&dd->affinity->lock);
+ /*
+ * If we've used all available CPUs, clear the mask and start
+ * overloading.
+ */
+ if (cpumask_equal(&set->mask, &set->used)) {
+ set->gen++;
+ cpumask_clear(&set->used);
+ }
+
+ /* CPUs used by interrupt handlers */
+ cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
+ &dd->affinity->def_intr.mask :
+ &dd->affinity->def_intr.used));
+ cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
+ &dd->affinity->rcv_intr.mask :
+ &dd->affinity->rcv_intr.used));
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs));
+ hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf);
+
+ /*
+ * If we don't have a NUMA node requested, preference is towards
+ * device NUMA node
+ */
+ if (node == -1)
+ node = dd->node;
+ node_mask = cpumask_of_node(node);
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask));
+ hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf);
+
+ /* diff will hold all unused cpus */
+ cpumask_andnot(diff, &set->mask, &set->used);
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff));
+ hfi1_cdbg(PROC, "unused CPUs (all) %s", buf);
+
+ /* get cpumask of available CPUs on preferred NUMA */
+ cpumask_and(mask, diff, node_mask);
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+ hfi1_cdbg(PROC, "available cpus on NUMA %s", buf);
+
+ /*
+ * At first, we don't want to place processes on the same
+ * CPUs as interrupt handlers.
+ */
+ cpumask_andnot(diff, mask, intrs);
+ if (!cpumask_empty(diff))
+ cpumask_copy(mask, diff);
+
+ /*
+ * if we don't have a cpu on the preferred NUMA, get
+ * the list of the remaining available CPUs
+ */
+ if (cpumask_empty(mask)) {
+ cpumask_andnot(diff, &set->mask, &set->used);
+ cpumask_andnot(mask, diff, node_mask);
+ }
+ scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+ hfi1_cdbg(PROC, "possible CPUs for process %s", buf);
+
+ cpu = cpumask_first(mask);
+ if (cpu >= nr_cpu_ids) /* empty */
+ cpu = -1;
+ else
+ cpumask_set_cpu(cpu, &set->used);
+ spin_unlock(&dd->affinity->lock);
+
+ free_cpumask_var(intrs);
+free_mask:
+ free_cpumask_var(mask);
+free_diff:
+ free_cpumask_var(diff);
+done:
+ return cpu;
+}
+
+void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
+{
+ struct cpu_mask_set *set = &dd->affinity->proc;
+
+ if (cpu < 0)
+ return;
+ spin_lock(&dd->affinity->lock);
+ cpumask_clear_cpu(cpu, &set->used);
+ if (cpumask_empty(&set->used) && set->gen) {
+ set->gen--;
+ cpumask_copy(&set->used, &set->mask);
+ }
+ spin_unlock(&dd->affinity->lock);
+}
+
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h
new file mode 100644
index 000000000000..b287e4963024
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_AFFINITY_H
+#define _HFI1_AFFINITY_H
+
+#include "hfi.h"
+
+enum irq_type {
+ IRQ_SDMA,
+ IRQ_RCVCTXT,
+ IRQ_GENERAL,
+ IRQ_OTHER
+};
+
+/* Can be used for both memory and cpu */
+enum affinity_flags {
+ AFF_AUTO,
+ AFF_NUMA_LOCAL,
+ AFF_DEV_LOCAL,
+ AFF_IRQ_LOCAL
+};
+
+struct hfi1_msix_entry;
+
+/* Initialize driver affinity data */
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
+/* Free driver affinity data */
+void hfi1_dev_affinity_free(struct hfi1_devdata *);
+/*
+ * Set IRQ affinity to a CPU. The function will determine the
+ * CPU and set the affinity to it.
+ */
+int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Remove the IRQ's CPU affinity. This function also updates
+ * any internal CPU tracking data
+ */
+void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Determine a CPU affinity for a user process, if the process does not
+ * have an affinity set yet.
+ */
+int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
+/* Release a CPU used by a user process. */
+void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
+
+#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h
new file mode 100644
index 000000000000..0d58fe3b49b5
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/aspm.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _ASPM_H
+#define _ASPM_H
+
+#include "hfi.h"
+
+extern uint aspm_mode;
+
+enum aspm_mode {
+ ASPM_MODE_DISABLED = 0, /* ASPM always disabled, performance mode */
+ ASPM_MODE_ENABLED = 1, /* ASPM always enabled, power saving mode */
+ ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */
+};
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+ (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+ u32 up, dn;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return false;
+
+ pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+ dn = ASPM_L1_SUPPORTED(dn);
+
+ pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+ up = ASPM_L1_SUPPORTED(up);
+
+ /* ASPM works on A-step but is reported as not supported */
+ return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+ u32 l1_ent_lat = 0x4u;
+ u32 reg32;
+
+ pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+ reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+ reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+ pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return;
+
+ /* Enable ASPM L1 first in upstream component and then downstream */
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /* Disable ASPM L1 first in downstream component and then upstream */
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+ if (parent)
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static inline void aspm_enable(struct hfi1_devdata *dd)
+{
+ if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+ !dd->aspm_supported)
+ return;
+
+ aspm_hw_enable_l1(dd);
+ dd->aspm_enabled = true;
+}
+
+static inline void aspm_disable(struct hfi1_devdata *dd)
+{
+ if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+ return;
+
+ aspm_hw_disable_l1(dd);
+ dd->aspm_enabled = false;
+}
+
+static inline void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ aspm_disable(dd);
+ atomic_inc(&dd->aspm_disabled_cnt);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static inline void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+ aspm_enable(dd);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+ bool restart_timer;
+ bool close_interrupts;
+ unsigned long flags;
+ ktime_t now, prev;
+
+ /* Quickest exit for minimum impact */
+ if (!rcd->aspm_intr_supported)
+ return;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ /* PSM contexts are open */
+ if (!rcd->aspm_intr_enable)
+ goto unlock;
+
+ prev = rcd->aspm_ts_last_intr;
+ now = ktime_get();
+ rcd->aspm_ts_last_intr = now;
+
+ /* An interrupt pair close together in time */
+ close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+ /* Don't push out our timer till this much time has elapsed */
+ restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+ ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+ restart_timer = restart_timer && close_interrupts;
+
+ /* Disable ASPM and schedule timer */
+ if (rcd->aspm_enabled && close_interrupts) {
+ aspm_disable_inc(rcd->dd);
+ rcd->aspm_enabled = false;
+ restart_timer = true;
+ }
+
+ if (restart_timer) {
+ mod_timer(&rcd->aspm_timer,
+ jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+ rcd->aspm_ts_timer_sched = now;
+ }
+unlock:
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static inline void aspm_ctx_timer_function(unsigned long data)
+{
+ struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ aspm_enable_dec(rcd->dd);
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+static inline void aspm_disable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ unsigned i;
+
+ for (i = 0; i < dd->first_user_ctxt; i++) {
+ rcd = dd->rcd[i];
+ del_timer_sync(&rcd->aspm_timer);
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = false;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ }
+
+ aspm_disable(dd);
+ atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+static inline void aspm_enable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ unsigned i;
+
+ aspm_enable(dd);
+
+ if (aspm_mode != ASPM_MODE_DYNAMIC)
+ return;
+
+ for (i = 0; i < dd->first_user_ctxt; i++) {
+ rcd = dd->rcd[i];
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = true;
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ }
+}
+
+static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+ spin_lock_init(&rcd->aspm_lock);
+ setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function,
+ (unsigned long)rcd);
+ rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+ aspm_mode == ASPM_MODE_DYNAMIC &&
+ rcd->ctxt < rcd->dd->first_user_ctxt;
+}
+
+static inline void aspm_init(struct hfi1_devdata *dd)
+{
+ unsigned i;
+
+ spin_lock_init(&dd->aspm_lock);
+ dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+ for (i = 0; i < dd->first_user_ctxt; i++)
+ aspm_ctx_init(dd->rcd[i]);
+
+ /* Start with ASPM disabled */
+ aspm_hw_set_l1_ent_latency(dd);
+ dd->aspm_enabled = false;
+ aspm_hw_disable_l1(dd);
+
+ /* Now turn on ASPM if configured */
+ aspm_enable_all(dd);
+}
+
+static inline void aspm_exit(struct hfi1_devdata *dd)
+{
+ aspm_disable_all(dd);
+
+ /* Turn on ASPM on exit to conserve power */
+ aspm_enable(dd);
+}
+
+#endif /* _ASPM_H */
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index 46a1830b509b..16eb653903e0 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -64,6 +61,8 @@
#include "sdma.h"
#include "eprom.h"
#include "efivar.h"
+#include "platform.h"
+#include "aspm.h"
#define NUM_IB_PORTS 1
@@ -420,10 +419,10 @@ static struct flag_table pio_err_status_flags[] = {
SEC_SPC_FREEZE,
SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
/*23*/ FLAG_ENTRY("PioWriteQwValidParity",
- SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+ SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
/*24*/ FLAG_ENTRY("PioBlockQwCountParity",
- SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+ SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
/*25*/ FLAG_ENTRY("PioVlfVlLenParity",
SEC_SPC_FREEZE,
@@ -509,6 +508,12 @@ static struct flag_table sdma_err_status_flags[] = {
| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
+/* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */
+#define PORT_DISCARD_EGRESS_ERRS \
+ (SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \
+ | SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \
+ | SEND_EGRESS_ERR_INFO_VL_ERR_SMASK)
+
/*
* TXE Egress Error flags
*/
@@ -936,7 +941,7 @@ static struct flag_table dc8051_err_flags[] = {
FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
- D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
+ D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
};
@@ -950,7 +955,7 @@ static struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME),
FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET),
FLAG_ENTRY0("Serdes internal loopback failure",
- FAILED_SERDES_INTERNAL_LOOPBACK),
+ FAILED_SERDES_INTERNAL_LOOPBACK),
FLAG_ENTRY0("Failed SerDes init", FAILED_SERDES_INIT),
FLAG_ENTRY0("Failed LNI(Polling)", FAILED_LNI_POLLING),
FLAG_ENTRY0("Failed LNI(Debounce)", FAILED_LNI_DEBOUNCE),
@@ -958,7 +963,8 @@ static struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Failed LNI(OptEq)", FAILED_LNI_OPTEQ),
FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
- FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT)
+ FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT),
+ FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT)
};
/*
@@ -978,7 +984,6 @@ static struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Link going down", 0x0100),
};
-
static u32 encoded_size(u32 size);
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
@@ -1140,11 +1145,8 @@ struct cntr_entry {
/*
* accessor for stat element, context either dd or ppd
*/
- u64 (*rw_cntr)(const struct cntr_entry *,
- void *context,
- int vl,
- int mode,
- u64 data);
+ u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl,
+ int mode, u64 data);
};
#define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
@@ -1188,7 +1190,7 @@ CNTR_ELEM(#name, \
#define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
#define OVR_ELM(ctx) \
CNTR_ELEM("RcvHdrOvr" #ctx, \
- (RCV_HDR_OVFL_CNT + ctx*0x100), \
+ (RCV_HDR_OVFL_CNT + ctx * 0x100), \
0, CNTR_NORMAL, port_access_u64_csr)
/* 32bit TXE */
@@ -1274,7 +1276,6 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
{
u64 ret;
-
if (mode == CNTR_MODE_R) {
ret = read_csr(dd, csr);
} else if (mode == CNTR_MODE_W) {
@@ -1291,17 +1292,65 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
/* Dev Access */
static u64 dev_access_u32_csr(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode, u64 data)
{
struct hfi1_devdata *dd = context;
+ u64 csr = entry->csr;
- if (vl != CNTR_INVALID_VL)
- return 0;
- return read_write_csr(dd, entry->csr, mode, data);
+ if (entry->flags & CNTR_SDMA) {
+ if (vl == CNTR_INVALID_VL)
+ return 0;
+ csr += 0x100 * vl;
+ } else {
+ if (vl != CNTR_INVALID_VL)
+ return 0;
+ }
+ return read_write_csr(dd, csr, mode, data);
+}
+
+static u64 access_sde_err_cnt(const struct cntr_entry *entry,
+ void *context, int idx, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ if (dd->per_sdma && idx < dd->num_sdma)
+ return dd->per_sdma[idx].err_cnt;
+ return 0;
+}
+
+static u64 access_sde_int_cnt(const struct cntr_entry *entry,
+ void *context, int idx, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ if (dd->per_sdma && idx < dd->num_sdma)
+ return dd->per_sdma[idx].sdma_int_cnt;
+ return 0;
+}
+
+static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry,
+ void *context, int idx, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ if (dd->per_sdma && idx < dd->num_sdma)
+ return dd->per_sdma[idx].idle_int_cnt;
+ return 0;
+}
+
+static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry,
+ void *context, int idx, int mode,
+ u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ if (dd->per_sdma && idx < dd->num_sdma)
+ return dd->per_sdma[idx].progress_int_cnt;
+ return 0;
}
static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
- int vl, int mode, u64 data)
+ int vl, int mode, u64 data)
{
struct hfi1_devdata *dd = context;
@@ -1322,7 +1371,7 @@ static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
}
static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
- int vl, int mode, u64 data)
+ int vl, int mode, u64 data)
{
struct hfi1_devdata *dd = context;
u32 csr = entry->csr;
@@ -1346,7 +1395,7 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
/* Port Access */
static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
- int vl, int mode, u64 data)
+ int vl, int mode, u64 data)
{
struct hfi1_pportdata *ppd = context;
@@ -1356,7 +1405,7 @@ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
}
static u64 port_access_u64_csr(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode, u64 data)
{
struct hfi1_pportdata *ppd = context;
u64 val;
@@ -1396,7 +1445,7 @@ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
}
static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
- int vl, int mode, u64 data)
+ int vl, int mode, u64 data)
{
struct hfi1_pportdata *ppd = context;
@@ -1406,7 +1455,7 @@ static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
}
static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
- int vl, int mode, u64 data)
+ int vl, int mode, u64 data)
{
struct hfi1_pportdata *ppd = context;
@@ -1427,18 +1476,25 @@ static u64 access_sw_unknown_frame_cnt(const struct cntr_entry *entry,
}
static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode, u64 data)
{
- struct hfi1_pportdata *ppd = context;
+ struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
+ u64 zero = 0;
+ u64 *counter;
- if (vl != CNTR_INVALID_VL)
- return 0;
+ if (vl == CNTR_INVALID_VL)
+ counter = &ppd->port_xmit_discards;
+ else if (vl >= 0 && vl < C_VL_COUNT)
+ counter = &ppd->port_xmit_discards_vl[vl];
+ else
+ counter = &zero;
- return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
+ return read_write_sw(ppd->dd, counter, mode, data);
}
static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode,
+ u64 data)
{
struct hfi1_pportdata *ppd = context;
@@ -1450,7 +1506,7 @@ static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
}
static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode, u64 data)
{
struct hfi1_pportdata *ppd = context;
@@ -1475,7 +1531,6 @@ static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
u64 __percpu *cntr,
int vl, int mode, u64 data)
{
-
u64 ret = 0;
if (vl != CNTR_INVALID_VL)
@@ -1507,7 +1562,7 @@ static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
}
static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode, u64 data)
{
struct hfi1_devdata *dd = context;
@@ -1523,6 +1578,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry,
return dd->verbs_dev.n_piowait;
}
+static u64 access_sw_pio_drain(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ return dd->verbs_dev.n_piodrain;
+}
+
static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data)
{
@@ -1540,11 +1603,12 @@ static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
}
static u64 access_sw_send_schedule(const struct cntr_entry *entry,
- void *context, int vl, int mode, u64 data)
+ void *context, int vl, int mode, u64 data)
{
struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
- return dd->verbs_dev.n_send_schedule;
+ return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl,
+ mode, data);
}
/* Software counters for the error status bits within MISC_ERR_STATUS */
@@ -3882,8 +3946,8 @@ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \
void *context, int vl, int mode, u64 data) \
{ \
struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \
- return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr, \
- ppd->ibport_data.cntr, vl, \
+ return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr, \
+ ppd->ibport_data.rvp.cntr, vl, \
mode, data); \
}
@@ -3900,7 +3964,7 @@ static u64 access_ibp_##cntr(const struct cntr_entry *entry, \
if (vl != CNTR_INVALID_VL) \
return 0; \
\
- return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr, \
+ return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr, \
mode, data); \
}
@@ -4063,10 +4127,28 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
access_sw_pio_wait),
+[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
+ access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule),
+[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
+ SEND_DMA_DESC_FETCHED_CNT, 0,
+ CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+ dev_access_u32_csr),
+[C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0,
+ CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+ access_sde_int_cnt),
+[C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0,
+ CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+ access_sde_err_cnt),
+[C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0,
+ CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+ access_sde_idle_int_cnt),
+[C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0,
+ CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+ access_sde_progress_int_cnt),
/* MISC_ERR_STATUS */
[C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0,
CNTR_NORMAL,
@@ -4876,28 +4958,28 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
[C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
[C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
[C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
- CNTR_SYNTH | CNTR_VL),
+ CNTR_SYNTH | CNTR_VL),
[C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
- CNTR_SYNTH | CNTR_VL),
+ CNTR_SYNTH | CNTR_VL),
[C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
- CNTR_SYNTH | CNTR_VL),
+ CNTR_SYNTH | CNTR_VL),
[C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
[C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
[C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
- access_sw_link_dn_cnt),
+ access_sw_link_dn_cnt),
[C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
- access_sw_link_up_cnt),
+ access_sw_link_up_cnt),
[C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL,
access_sw_unknown_frame_cnt),
[C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
- access_sw_xmit_discards),
+ access_sw_xmit_discards),
[C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
- CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
- access_sw_xmit_discards),
+ CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
+ access_sw_xmit_discards),
[C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
- access_xmit_constraint_errs),
+ access_xmit_constraint_errs),
[C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
- access_rcv_constraint_errs),
+ access_rcv_constraint_errs),
[C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
[C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
[C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
@@ -4913,9 +4995,9 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
access_sw_cpu_rc_acks),
[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
- access_sw_cpu_rc_qacks),
+ access_sw_cpu_rc_qacks),
[C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
- access_sw_cpu_rc_delayed_comp),
+ access_sw_cpu_rc_delayed_comp),
[OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
[OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
[OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
@@ -5064,7 +5146,7 @@ done:
* the buffer. End in '*' if the buffer is too short.
*/
static char *flag_string(char *buf, int buf_len, u64 flags,
- struct flag_table *table, int table_size)
+ struct flag_table *table, int table_size)
{
char extra[32];
char *p = buf;
@@ -5125,10 +5207,8 @@ static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
if (source < ARRAY_SIZE(cce_misc_names))
strncpy(buf, cce_misc_names[source], bsize);
else
- snprintf(buf,
- bsize,
- "Reserved%u",
- source + IS_GENERAL_ERR_START);
+ snprintf(buf, bsize, "Reserved%u",
+ source + IS_GENERAL_ERR_START);
return buf;
}
@@ -5167,7 +5247,7 @@ static char *is_various_name(char *buf, size_t bsize, unsigned int source)
if (source < ARRAY_SIZE(various_names))
strncpy(buf, various_names[source], bsize);
else
- snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
+ snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
return buf;
}
@@ -5252,51 +5332,56 @@ static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
+ cce_err_status_flags,
+ ARRAY_SIZE(cce_err_status_flags));
}
static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
+ rxe_err_status_flags,
+ ARRAY_SIZE(rxe_err_status_flags));
}
static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags, misc_err_status_flags,
- ARRAY_SIZE(misc_err_status_flags));
+ ARRAY_SIZE(misc_err_status_flags));
}
static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
+ pio_err_status_flags,
+ ARRAY_SIZE(pio_err_status_flags));
}
static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- sdma_err_status_flags,
- ARRAY_SIZE(sdma_err_status_flags));
+ sdma_err_status_flags,
+ ARRAY_SIZE(sdma_err_status_flags));
}
static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
+ egress_err_status_flags,
+ ARRAY_SIZE(egress_err_status_flags));
}
static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
+ egress_err_info_flags,
+ ARRAY_SIZE(egress_err_info_flags));
}
static char *send_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- send_err_status_flags,
- ARRAY_SIZE(send_err_status_flags));
+ send_err_status_flags,
+ ARRAY_SIZE(send_err_status_flags));
}
static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -5309,7 +5394,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
* report or record it.
*/
dd_dev_info(dd, "CCE Error: %s\n",
- cce_err_status_string(buf, sizeof(buf), reg));
+ cce_err_status_string(buf, sizeof(buf), reg));
if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) &&
is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
@@ -5339,14 +5424,14 @@ static void update_rcverr_timer(unsigned long opaque)
u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
- ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
+ ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
- set_link_down_reason(ppd,
- OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
- OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
+ set_link_down_reason(
+ ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
+ OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
}
- dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
+ dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
}
@@ -5372,7 +5457,7 @@ static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
int i = 0;
dd_dev_info(dd, "Receive Error: %s\n",
- rxe_err_status_string(buf, sizeof(buf), reg));
+ rxe_err_status_string(buf, sizeof(buf), reg));
if (reg & ALL_RXE_FREEZE_ERR) {
int flags = 0;
@@ -5399,7 +5484,7 @@ static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
int i = 0;
dd_dev_info(dd, "Misc Error: %s",
- misc_err_status_string(buf, sizeof(buf), reg));
+ misc_err_status_string(buf, sizeof(buf), reg));
for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) {
if (reg & (1ull << i))
incr_cntr64(&dd->misc_err_status_cnt[i]);
@@ -5412,7 +5497,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
int i = 0;
dd_dev_info(dd, "PIO Error: %s\n",
- pio_err_status_string(buf, sizeof(buf), reg));
+ pio_err_status_string(buf, sizeof(buf), reg));
if (reg & ALL_PIO_FREEZE_ERR)
start_freeze_handling(dd->pport, 0);
@@ -5429,7 +5514,7 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
int i = 0;
dd_dev_info(dd, "SDMA Error: %s\n",
- sdma_err_status_string(buf, sizeof(buf), reg));
+ sdma_err_status_string(buf, sizeof(buf), reg));
if (reg & ALL_SDMA_FREEZE_ERR)
start_freeze_handling(dd->pport, 0);
@@ -5440,12 +5525,14 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
}
-static void count_port_inactive(struct hfi1_devdata *dd)
+static inline void __count_port_discards(struct hfi1_pportdata *ppd)
{
- struct hfi1_pportdata *ppd = dd->pport;
+ incr_cntr64(&ppd->port_xmit_discards);
+}
- if (ppd->port_xmit_discards < ~(u64)0)
- ppd->port_xmit_discards++;
+static void count_port_inactive(struct hfi1_devdata *dd)
+{
+ __count_port_discards(dd->pport);
}
/*
@@ -5457,7 +5544,8 @@ static void count_port_inactive(struct hfi1_devdata *dd)
* egress error if more than one packet fails the same integrity check
* since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
*/
-static void handle_send_egress_err_info(struct hfi1_devdata *dd)
+static void handle_send_egress_err_info(struct hfi1_devdata *dd,
+ int vl)
{
struct hfi1_pportdata *ppd = dd->pport;
u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
@@ -5468,14 +5556,44 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd)
write_csr(dd, SEND_EGRESS_ERR_INFO, info);
dd_dev_info(dd,
- "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
- info, egress_err_info_string(buf, sizeof(buf), info), src);
+ "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
+ info, egress_err_info_string(buf, sizeof(buf), info), src);
/* Eventually add other counters for each bit */
+ if (info & PORT_DISCARD_EGRESS_ERRS) {
+ int weight, i;
- if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
- if (ppd->port_xmit_discards < ~(u64)0)
- ppd->port_xmit_discards++;
+ /*
+ * Count all applicable bits as individual errors and
+ * attribute them to the packet that triggered this handler.
+ * This may not be completely accurate due to limitations
+ * on the available hardware error information. There is
+ * a single information register and any number of error
+ * packets may have occurred and contributed to it before
+ * this routine is called. This means that:
+ * a) If multiple packets with the same error occur before
+ * this routine is called, earlier packets are missed.
+ * There is only a single bit for each error type.
+ * b) Errors may not be attributed to the correct VL.
+ * The driver is attributing all bits in the info register
+ * to the packet that triggered this call, but bits
+ * could be an accumulation of different packets with
+ * different VLs.
+ * c) A single error packet may have multiple counts attached
+ * to it. There is no way for the driver to know if
+ * multiple bits set in the info register are due to a
+ * single packet or multiple packets. The driver assumes
+ * multiple packets.
+ */
+ weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS);
+ for (i = 0; i < weight; i++) {
+ __count_port_discards(ppd);
+ if (vl >= 0 && vl < TXE_NUM_DATA_VL)
+ incr_cntr64(&ppd->port_xmit_discards_vl[vl]);
+ else if (vl == 15)
+ incr_cntr64(&ppd->port_xmit_discards_vl
+ [C_VL_15]);
+ }
}
}
@@ -5493,12 +5611,71 @@ static inline int port_inactive_err(u64 posn)
* Input value is a bit position within the SEND_EGRESS_ERR_STATUS
* register. Does it represent a 'disallowed packet' error?
*/
-static inline int disallowed_pkt_err(u64 posn)
+static inline int disallowed_pkt_err(int posn)
{
return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
}
+/*
+ * Input value is a bit position of one of the SDMA engine disallowed
+ * packet errors. Return which engine. Use of this must be guarded by
+ * disallowed_pkt_err().
+ */
+static inline int disallowed_pkt_engine(int posn)
+{
+ return posn - SEES(TX_SDMA0_DISALLOWED_PACKET);
+}
+
+/*
+ * Translate an SDMA engine to a VL. Return -1 if the tranlation cannot
+ * be done.
+ */
+static int engine_to_vl(struct hfi1_devdata *dd, int engine)
+{
+ struct sdma_vl_map *m;
+ int vl;
+
+ /* range check */
+ if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES)
+ return -1;
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->sdma_map);
+ vl = m->engine_to_vl[engine];
+ rcu_read_unlock();
+
+ return vl;
+}
+
+/*
+ * Translate the send context (sofware index) into a VL. Return -1 if the
+ * translation cannot be done.
+ */
+static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
+{
+ struct send_context_info *sci;
+ struct send_context *sc;
+ int i;
+
+ sci = &dd->send_contexts[sw_index];
+
+ /* there is no information for user (PSM) and ack contexts */
+ if (sci->type != SC_KERNEL)
+ return -1;
+
+ sc = sci->sc;
+ if (!sc)
+ return -1;
+ if (dd->vld[15].sc == sc)
+ return 15;
+ for (i = 0; i < num_vls; i++)
+ if (dd->vld[i].sc == sc)
+ return i;
+
+ return -1;
+}
+
static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
{
u64 reg_copy = reg, handled = 0;
@@ -5507,34 +5684,34 @@ static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
start_freeze_handling(dd->pport, 0);
- if (is_ax(dd) && (reg &
- SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
- && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
+ else if (is_ax(dd) &&
+ (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) &&
+ (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
start_freeze_handling(dd->pport, 0);
while (reg_copy) {
int posn = fls64(reg_copy);
- /*
- * fls64() returns a 1-based offset, but we generally
- * want 0-based offsets.
- */
+ /* fls64() returns a 1-based offset, we want it zero based */
int shift = posn - 1;
+ u64 mask = 1ULL << shift;
if (port_inactive_err(shift)) {
count_port_inactive(dd);
- handled |= (1ULL << shift);
+ handled |= mask;
} else if (disallowed_pkt_err(shift)) {
- handle_send_egress_err_info(dd);
- handled |= (1ULL << shift);
+ int vl = engine_to_vl(dd, disallowed_pkt_engine(shift));
+
+ handle_send_egress_err_info(dd, vl);
+ handled |= mask;
}
- clear_bit(shift, (unsigned long *)&reg_copy);
+ reg_copy &= ~mask;
}
reg &= ~handled;
if (reg)
dd_dev_info(dd, "Egress Error: %s\n",
- egress_err_status_string(buf, sizeof(buf), reg));
+ egress_err_status_string(buf, sizeof(buf), reg));
for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) {
if (reg & (1ull << i))
@@ -5548,7 +5725,7 @@ static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
int i = 0;
dd_dev_info(dd, "Send Error: %s\n",
- send_err_status_string(buf, sizeof(buf), reg));
+ send_err_status_string(buf, sizeof(buf), reg));
for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) {
if (reg & (1ull << i))
@@ -5594,7 +5771,7 @@ static void interrupt_clear_down(struct hfi1_devdata *dd,
u64 mask;
dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
- eri->desc, reg);
+ eri->desc, reg);
/*
* Read-modify-write so any other masked bits
* remain masked.
@@ -5618,14 +5795,15 @@ static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
interrupt_clear_down(dd, 0, eri);
} else {
dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
- source);
+ source);
}
}
static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
{
return flag_string(buf, buf_len, flags,
- sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
+ sc_err_status_flags,
+ ARRAY_SIZE(sc_err_status_flags));
}
/*
@@ -5650,15 +5828,15 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
sw_index = dd->hw_to_sw[hw_context];
if (sw_index >= dd->num_send_contexts) {
dd_dev_err(dd,
- "out of range sw index %u for send context %u\n",
- sw_index, hw_context);
+ "out of range sw index %u for send context %u\n",
+ sw_index, hw_context);
return;
}
sci = &dd->send_contexts[sw_index];
sc = sci->sc;
if (!sc) {
dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
- sw_index, hw_context);
+ sw_index, hw_context);
return;
}
@@ -5668,10 +5846,11 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
- send_context_err_status_string(flags, sizeof(flags), status));
+ send_context_err_status_string(flags, sizeof(flags),
+ status));
if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
- handle_send_egress_err_info(dd);
+ handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index));
/*
* Automatically restart halted kernel contexts out of interrupt
@@ -5704,6 +5883,7 @@ static void handle_sdma_eng_err(struct hfi1_devdata *dd,
dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
sde->this_idx, source, (unsigned long long)status);
#endif
+ sde->err_cnt++;
sdma_engine_error(sde, status);
/*
@@ -5752,23 +5932,22 @@ static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
interrupt_clear_down(dd, 0, eri);
else
dd_dev_info(dd,
- "%s: Unimplemented/reserved interrupt %d\n",
- __func__, source);
+ "%s: Unimplemented/reserved interrupt %d\n",
+ __func__, source);
}
static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
{
- /* source is always zero */
+ /* src_ctx is always zero */
struct hfi1_pportdata *ppd = dd->pport;
unsigned long flags;
u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
if (reg & QSFP_HFI0_MODPRST_N) {
-
- dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
- __func__);
-
if (!qsfp_mod_present(ppd)) {
+ dd_dev_info(dd, "%s: QSFP module removed\n",
+ __func__);
+
ppd->driver_link_ready = 0;
/*
* Cable removed, reset all our information about the
@@ -5781,14 +5960,23 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
* an interrupt when a cable is inserted
*/
ppd->qsfp_info.cache_valid = 0;
- ppd->qsfp_info.qsfp_interrupt_functional = 0;
+ ppd->qsfp_info.reset_needed = 0;
+ ppd->qsfp_info.limiting_active = 0;
spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
- flags);
- write_csr(dd,
- dd->hfi1_id ?
- ASIC_QSFP2_INVERT :
- ASIC_QSFP1_INVERT,
- qsfp_int_mgmt);
+ flags);
+ /* Invert the ModPresent pin now to detect plug-in */
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+ ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+ if ((ppd->offline_disabled_reason >
+ HFI1_ODR_MASK(
+ OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) ||
+ (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(
+ OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+
if (ppd->host_link_state == HLS_DN_POLL) {
/*
* The link is still in POLL. This means
@@ -5799,28 +5987,33 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
queue_work(ppd->hfi1_wq, &ppd->link_down_work);
}
} else {
+ dd_dev_info(dd, "%s: QSFP module inserted\n",
+ __func__);
+
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.cache_valid = 0;
ppd->qsfp_info.cache_refresh_required = 1;
spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
- flags);
+ flags);
+ /*
+ * Stop inversion of ModPresent pin to detect
+ * removal of the cable
+ */
qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
- write_csr(dd,
- dd->hfi1_id ?
- ASIC_QSFP2_INVERT :
- ASIC_QSFP1_INVERT,
- qsfp_int_mgmt);
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+ ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
}
}
if (reg & QSFP_HFI0_INT_N) {
-
- dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
- __func__);
+ dd_dev_info(dd, "%s: Interrupt received from QSFP module\n",
+ __func__);
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.check_interrupt_flags = 1;
- ppd->qsfp_info.qsfp_interrupt_functional = 1;
spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
}
@@ -5834,11 +6027,11 @@ static int request_host_lcb_access(struct hfi1_devdata *dd)
int ret;
ret = do_8051_command(dd, HCMD_MISC,
- (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
- NULL);
+ (u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
+ LOAD_DATA_FIELD_ID_SHIFT, NULL);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd, "%s: command failed with error %d\n",
- __func__, ret);
+ __func__, ret);
}
return ret == HCMD_SUCCESS ? 0 : -EBUSY;
}
@@ -5848,11 +6041,11 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd)
int ret;
ret = do_8051_command(dd, HCMD_MISC,
- (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
- NULL);
+ (u64)HCMD_MISC_GRANT_LCB_ACCESS <<
+ LOAD_DATA_FIELD_ID_SHIFT, NULL);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd, "%s: command failed with error %d\n",
- __func__, ret);
+ __func__, ret);
}
return ret == HCMD_SUCCESS ? 0 : -EBUSY;
}
@@ -5864,8 +6057,8 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd)
static inline void set_host_lcb_access(struct hfi1_devdata *dd)
{
write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
- DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
- | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
+ DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK |
+ DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
}
/*
@@ -5875,7 +6068,7 @@ static inline void set_host_lcb_access(struct hfi1_devdata *dd)
static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
{
write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
- DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
+ DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
}
/*
@@ -5909,7 +6102,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
/* this access is valid only when the link is up */
if ((ppd->host_link_state & HLS_UP) == 0) {
dd_dev_info(dd, "%s: link state %s not up\n",
- __func__, link_state_name(ppd->host_link_state));
+ __func__, link_state_name(ppd->host_link_state));
ret = -EBUSY;
goto done;
}
@@ -5918,8 +6111,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
ret = request_host_lcb_access(dd);
if (ret) {
dd_dev_err(dd,
- "%s: unable to acquire LCB access, err %d\n",
- __func__, ret);
+ "%s: unable to acquire LCB access, err %d\n",
+ __func__, ret);
goto done;
}
set_host_lcb_access(dd);
@@ -5956,7 +6149,7 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
if (dd->lcb_access_count == 0) {
dd_dev_err(dd, "%s: LCB access count is zero. Skipping.\n",
- __func__);
+ __func__);
goto done;
}
@@ -5965,8 +6158,8 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
ret = request_8051_lcb_access(dd);
if (ret) {
dd_dev_err(dd,
- "%s: unable to release LCB access, err %d\n",
- __func__, ret);
+ "%s: unable to release LCB access, err %d\n",
+ __func__, ret);
/* restore host access if the grant didn't work */
set_host_lcb_access(dd);
goto done;
@@ -5998,19 +6191,26 @@ static void init_lcb_access(struct hfi1_devdata *dd)
static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
{
write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
- DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
- | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
- | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
+ DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK |
+ (u64)return_code <<
+ DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT |
+ (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
}
/*
- * Handle requests from the 8051.
+ * Handle host requests from the 8051.
+ *
+ * This is a work-queue function outside of the interrupt.
*/
-static void handle_8051_request(struct hfi1_devdata *dd)
+void handle_8051_request(struct work_struct *work)
{
+ struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+ dc_host_req_work);
+ struct hfi1_devdata *dd = ppd->dd;
u64 reg;
- u16 data;
- u8 type;
+ u16 data = 0;
+ u8 type, i, lanes, *cache = ppd->qsfp_info.cache;
+ u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
@@ -6031,12 +6231,46 @@ static void handle_8051_request(struct hfi1_devdata *dd)
case HREQ_READ_CONFIG:
case HREQ_SET_TX_EQ_ABS:
case HREQ_SET_TX_EQ_REL:
- case HREQ_ENABLE:
dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
- type);
+ type);
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
break;
+ case HREQ_ENABLE:
+ lanes = data & 0xF;
+ for (i = 0; lanes; lanes >>= 1, i++) {
+ if (!(lanes & 1))
+ continue;
+ if (data & 0x200) {
+ /* enable TX CDR */
+ if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+ cache[QSFP_CDR_INFO_OFFS] & 0x80)
+ cdr_ctrl_byte |= (1 << (i + 4));
+ } else {
+ /* disable TX CDR */
+ if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+ cache[QSFP_CDR_INFO_OFFS] & 0x80)
+ cdr_ctrl_byte &= ~(1 << (i + 4));
+ }
+
+ if (data & 0x800) {
+ /* enable RX CDR */
+ if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+ cache[QSFP_CDR_INFO_OFFS] & 0x40)
+ cdr_ctrl_byte |= (1 << i);
+ } else {
+ /* disable RX CDR */
+ if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+ cache[QSFP_CDR_INFO_OFFS] & 0x40)
+ cdr_ctrl_byte &= ~(1 << i);
+ }
+ }
+ one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+ &cdr_ctrl_byte, 1);
+ hreq_response(dd, HREQ_SUCCESS, data);
+ refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+ break;
+
case HREQ_CONFIG_DONE:
hreq_response(dd, HREQ_SUCCESS, 0);
break;
@@ -6056,11 +6290,11 @@ static void write_global_credit(struct hfi1_devdata *dd,
u8 vau, u16 total, u16 shared)
{
write_csr(dd, SEND_CM_GLOBAL_CREDIT,
- ((u64)total
- << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
- | ((u64)shared
- << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
- | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+ ((u64)total <<
+ SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
+ ((u64)shared <<
+ SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
+ ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
}
/*
@@ -6097,7 +6331,7 @@ void reset_link_credits(struct hfi1_devdata *dd)
/* remove all previous VL credit limits */
for (i = 0; i < TXE_NUM_DATA_VL; i++)
- write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+ write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
write_csr(dd, SEND_CM_CREDIT_VL15, 0);
write_global_credit(dd, 0, 0, 0);
/* reset the CM block */
@@ -6139,15 +6373,14 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
write_csr(dd, DC_LCB_CFG_RUN, 0);
/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
- 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
+ 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
reg = read_csr(dd, DCC_CFG_RESET);
- write_csr(dd, DCC_CFG_RESET,
- reg
- | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
- | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
- (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
+ write_csr(dd, DCC_CFG_RESET, reg |
+ (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
+ (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+ (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
if (!abort) {
udelay(1); /* must hold for the longer of 16cclks or 20ns */
write_csr(dd, DCC_CFG_RESET, reg);
@@ -6176,14 +6409,18 @@ static void dc_shutdown(struct hfi1_devdata *dd)
spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
- /* Going to OFFLINE would have causes the 8051 to put the
+ /*
+ * Going to OFFLINE would have causes the 8051 to put the
* SerDes into reset already. Just need to shut down the 8051,
- * itself. */
+ * itself.
+ */
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
-/* Calling this after the DC has been brought out of reset should not
- * do any damage. */
+/*
+ * Calling this after the DC has been brought out of reset should not
+ * do any damage.
+ */
static void dc_start(struct hfi1_devdata *dd)
{
unsigned long flags;
@@ -6199,7 +6436,7 @@ static void dc_start(struct hfi1_devdata *dd)
ret = wait_fm_ready(dd, TIMEOUT_8051_START);
if (ret) {
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
- __func__);
+ __func__);
}
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -6292,7 +6529,7 @@ static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
- DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
}
@@ -6309,8 +6546,10 @@ void handle_sma_message(struct work_struct *work)
u64 msg;
int ret;
- /* msg is bytes 1-4 of the 40-bit idle message - the command code
- is stripped off */
+ /*
+ * msg is bytes 1-4 of the 40-bit idle message - the command code
+ * is stripped off
+ */
ret = read_idle_sma(dd, &msg);
if (ret)
return;
@@ -6336,8 +6575,8 @@ void handle_sma_message(struct work_struct *work)
*
* Can activate the node. Discard otherwise.
*/
- if (ppd->host_link_state == HLS_UP_ARMED
- && ppd->is_active_optimize_enabled) {
+ if (ppd->host_link_state == HLS_UP_ARMED &&
+ ppd->is_active_optimize_enabled) {
ppd->neighbor_normal = 1;
ret = set_link_state(ppd, HLS_UP_ACTIVE);
if (ret)
@@ -6349,8 +6588,8 @@ void handle_sma_message(struct work_struct *work)
break;
default:
dd_dev_err(dd,
- "%s: received unexpected SMA idle message 0x%llx\n",
- __func__, msg);
+ "%s: received unexpected SMA idle message 0x%llx\n",
+ __func__, msg);
break;
}
}
@@ -6442,10 +6681,9 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
- "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
- freeze ? "" : "un",
- reg & ALL_FROZE,
- freeze ? ALL_FROZE : 0ull);
+ "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
+ freeze ? "" : "un", reg & ALL_FROZE,
+ freeze ? ALL_FROZE : 0ull);
return;
}
usleep_range(80, 120);
@@ -6475,11 +6713,17 @@ static void rxe_freeze(struct hfi1_devdata *dd)
*/
static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
{
+ u32 rcvmask;
int i;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++)
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
+ for (i = 0; i < dd->n_krcv_queues; i++) {
+ rcvmask = HFI1_RCVCTRL_CTXT_ENB;
+ /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
+ rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+ HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
+ hfi1_rcvctrl(dd, rcvmask, i);
+ }
/* enable port */
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -6564,7 +6808,7 @@ void handle_freeze(struct work_struct *work)
void handle_link_up(struct work_struct *work)
{
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
- link_up_work);
+ link_up_work);
set_link_state(ppd, HLS_UP_INIT);
/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
@@ -6583,17 +6827,20 @@ void handle_link_up(struct work_struct *work)
if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
/* oops - current speed is not enabled, bounce */
dd_dev_err(ppd->dd,
- "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
- ppd->link_speed_active, ppd->link_speed_enabled);
+ "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
+ ppd->link_speed_active, ppd->link_speed_enabled);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
- OPA_LINKDOWN_REASON_SPEED_POLICY);
+ OPA_LINKDOWN_REASON_SPEED_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
+ tune_serdes(ppd);
start_link(ppd);
}
}
-/* Several pieces of LNI information were cached for SMA in ppd.
- * Reset these on link down */
+/*
+ * Several pieces of LNI information were cached for SMA in ppd.
+ * Reset these on link down
+ */
static void reset_neighbor_info(struct hfi1_pportdata *ppd)
{
ppd->neighbor_guid = 0;
@@ -6613,7 +6860,13 @@ void handle_link_down(struct work_struct *work)
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
link_down_work);
- /* go offline first, then deal with reasons */
+ if ((ppd->host_link_state &
+ (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
+ ppd->port_type == PORT_TYPE_FIXED)
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
+
+ /* Go offline first, then deal with reading/writing through 8051 */
set_link_state(ppd, HLS_DN_OFFLINE);
lcl_reason = 0;
@@ -6633,12 +6886,16 @@ void handle_link_down(struct work_struct *work)
/* disable the port */
clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
- /* If there is no cable attached, turn the DC off. Otherwise,
- * start the link bring up. */
- if (!qsfp_mod_present(ppd))
+ /*
+ * If there is no cable attached, turn the DC off. Otherwise,
+ * start the link bring up.
+ */
+ if (!qsfp_mod_present(ppd)) {
dc_shutdown(ppd->dd);
- else
+ } else {
+ tune_serdes(ppd);
start_link(ppd);
+ }
}
void handle_link_bounce(struct work_struct *work)
@@ -6651,10 +6908,11 @@ void handle_link_bounce(struct work_struct *work)
*/
if (ppd->host_link_state & HLS_UP) {
set_link_state(ppd, HLS_DN_OFFLINE);
+ tune_serdes(ppd);
start_link(ppd);
} else {
dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
- __func__, link_state_name(ppd->host_link_state));
+ __func__, link_state_name(ppd->host_link_state));
}
}
@@ -6751,7 +7009,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
case 3: return OPA_LINK_WIDTH_3X;
default:
dd_dev_info(dd, "%s: invalid width %d, using 4\n",
- __func__, width);
+ __func__, width);
/* fall through */
case 4: return OPA_LINK_WIDTH_4X;
}
@@ -6763,6 +7021,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
static const u8 bit_counts[16] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
};
+
static inline u8 nibble_to_count(u8 nibble)
{
return bit_counts[nibble & 0xf];
@@ -6788,7 +7047,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
/* read the active lanes */
read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
- &rx_polarity_inversion, &max_rate);
+ &rx_polarity_inversion, &max_rate);
read_local_lni(dd, &enable_lane_rx);
/* convert to counts */
@@ -6800,8 +7059,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* handle_verify_cap(). The ASIC 8051 firmware does not correctly
* set the max_rate field in handle_verify_cap until v0.19.
*/
- if ((dd->icode == ICODE_RTL_SILICON)
- && (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ if ((dd->icode == ICODE_RTL_SILICON) &&
+ (dd->dc8051_ver < dc8051_ver(0, 19))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@@ -6809,8 +7068,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
break;
default:
dd_dev_err(dd,
- "%s: unexpected max rate %d, using 25Gb\n",
- __func__, (int)max_rate);
+ "%s: unexpected max rate %d, using 25Gb\n",
+ __func__, (int)max_rate);
/* fall through */
case 1:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
@@ -6819,8 +7078,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
}
dd_dev_info(dd,
- "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
- enable_lane_tx, tx, enable_lane_rx, rx);
+ "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
+ enable_lane_tx, tx, enable_lane_rx, rx);
*tx_width = link_width_to_bits(dd, tx);
*rx_width = link_width_to_bits(dd, rx);
}
@@ -6923,13 +7182,8 @@ void handle_verify_cap(struct work_struct *work)
*/
read_vc_remote_phy(dd, &power_management, &continious);
- read_vc_remote_fabric(
- dd,
- &vau,
- &z,
- &vcu,
- &vl15buf,
- &partner_supported_crc);
+ read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
+ &partner_supported_crc);
read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
read_remote_device_id(dd, &device_id, &device_rev);
/*
@@ -6940,19 +7194,16 @@ void handle_verify_cap(struct work_struct *work)
/* print the active widths */
get_link_widths(dd, &active_tx, &active_rx);
dd_dev_info(dd,
- "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
- (int)power_management, (int)continious);
+ "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
+ (int)power_management, (int)continious);
dd_dev_info(dd,
- "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
- (int)vau,
- (int)z,
- (int)vcu,
- (int)vl15buf,
- (int)partner_supported_crc);
+ "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
+ (int)vau, (int)z, (int)vcu, (int)vl15buf,
+ (int)partner_supported_crc);
dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
- (u32)remote_tx_rate, (u32)link_widths);
+ (u32)remote_tx_rate, (u32)link_widths);
dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
- (u32)device_id, (u32)device_rev);
+ (u32)device_id, (u32)device_rev);
/*
* The peer vAU value just read is the peer receiver value. HFI does
* not support a transmit vAU of 0 (AU == 8). We advertised that
@@ -6987,10 +7238,10 @@ void handle_verify_cap(struct work_struct *work)
reg = read_csr(dd, SEND_CM_CTRL);
if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
write_csr(dd, SEND_CM_CTRL,
- reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+ reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
} else {
write_csr(dd, SEND_CM_CTRL,
- reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+ reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
}
ppd->link_speed_active = 0; /* invalid value */
@@ -7015,7 +7266,7 @@ void handle_verify_cap(struct work_struct *work)
}
if (ppd->link_speed_active == 0) {
dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
- __func__, (int)remote_tx_rate);
+ __func__, (int)remote_tx_rate);
ppd->link_speed_active = OPA_LINK_SPEED_25G;
}
@@ -7071,9 +7322,9 @@ void handle_verify_cap(struct work_struct *work)
read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
+ "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->mgmt_allowed, ppd->neighbor_fm_security);
if (ppd->mgmt_allowed)
add_full_mgmt_pkey(ppd);
@@ -7127,28 +7378,27 @@ retry:
/* bounce if not at starting active width */
if ((ppd->link_width_active !=
- ppd->link_width_downgrade_tx_active)
- || (ppd->link_width_active !=
- ppd->link_width_downgrade_rx_active)) {
+ ppd->link_width_downgrade_tx_active) ||
+ (ppd->link_width_active !=
+ ppd->link_width_downgrade_rx_active)) {
dd_dev_err(ppd->dd,
- "Link downgrade is disabled and link has downgraded, downing link\n");
+ "Link downgrade is disabled and link has downgraded, downing link\n");
dd_dev_err(ppd->dd,
- " original 0x%x, tx active 0x%x, rx active 0x%x\n",
- ppd->link_width_active,
- ppd->link_width_downgrade_tx_active,
- ppd->link_width_downgrade_rx_active);
+ " original 0x%x, tx active 0x%x, rx active 0x%x\n",
+ ppd->link_width_active,
+ ppd->link_width_downgrade_tx_active,
+ ppd->link_width_downgrade_rx_active);
do_bounce = 1;
}
- } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
- || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
+ } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 ||
+ (lwde & ppd->link_width_downgrade_rx_active) == 0) {
/* Tx or Rx is outside the enabled policy */
dd_dev_err(ppd->dd,
- "Link is outside of downgrade allowed, downing link\n");
+ "Link is outside of downgrade allowed, downing link\n");
dd_dev_err(ppd->dd,
- " enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
- lwde,
- ppd->link_width_downgrade_tx_active,
- ppd->link_width_downgrade_rx_active);
+ " enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
+ lwde, ppd->link_width_downgrade_tx_active,
+ ppd->link_width_downgrade_rx_active);
do_bounce = 1;
}
@@ -7157,8 +7407,9 @@ done:
if (do_bounce) {
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
- OPA_LINKDOWN_REASON_WIDTH_POLICY);
+ OPA_LINKDOWN_REASON_WIDTH_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
+ tune_serdes(ppd);
start_link(ppd);
}
}
@@ -7239,9 +7490,10 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
queue_link_down = 1;
dd_dev_info(dd, "Link error: %s\n",
- dc8051_info_err_string(buf,
- sizeof(buf),
- err & FAILED_LNI));
+ dc8051_info_err_string(buf,
+ sizeof(buf),
+ err &
+ FAILED_LNI));
}
err &= ~(u64)FAILED_LNI;
}
@@ -7253,7 +7505,8 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
if (err) {
/* report remaining errors, but do not do anything */
dd_dev_err(dd, "8051 info error: %s\n",
- dc8051_info_err_string(buf, sizeof(buf), err));
+ dc8051_info_err_string(buf, sizeof(buf),
+ err));
}
/*
@@ -7281,7 +7534,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)LINKUP_ACHIEVED;
}
if (host_msg & EXT_DEVICE_CFG_REQ) {
- handle_8051_request(dd);
+ queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work);
host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
}
if (host_msg & VERIFY_CAP_FRAME) {
@@ -7306,8 +7559,9 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
if (host_msg) {
/* report remaining messages, but do not do anything */
dd_dev_info(dd, "8051 info host message: %s\n",
- dc8051_info_host_msg_string(buf, sizeof(buf),
- host_msg));
+ dc8051_info_host_msg_string(buf,
+ sizeof(buf),
+ host_msg));
}
reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
@@ -7320,25 +7574,27 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
*/
dd_dev_err(dd, "Lost 8051 heartbeat\n");
write_csr(dd, DC_DC8051_ERR_EN,
- read_csr(dd, DC_DC8051_ERR_EN)
- & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
+ read_csr(dd, DC_DC8051_ERR_EN) &
+ ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
}
if (reg) {
/* report the error, but do not do anything */
dd_dev_err(dd, "8051 error: %s\n",
- dc8051_err_string(buf, sizeof(buf), reg));
+ dc8051_err_string(buf, sizeof(buf), reg));
}
if (queue_link_down) {
- /* if the link is already going down or disabled, do not
- * queue another */
- if ((ppd->host_link_state
- & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
- || ppd->link_enabled == 0) {
+ /*
+ * if the link is already going down or disabled, do not
+ * queue another
+ */
+ if ((ppd->host_link_state &
+ (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
+ ppd->link_enabled == 0) {
dd_dev_info(dd, "%s: not queuing link down\n",
- __func__);
+ __func__);
} else {
queue_work(ppd->hfi1_wq, &ppd->link_down_work);
}
@@ -7480,8 +7736,10 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
/* set status bit */
dd->err_info_rcvport.status_and_code |=
OPA_EI_STATUS_SMASK;
- /* save first 2 flits in the packet that caused
- * the error */
+ /*
+ * save first 2 flits in the packet that caused
+ * the error
+ */
dd->err_info_rcvport.packet_flit1 = hdr0;
dd->err_info_rcvport.packet_flit2 = hdr1;
}
@@ -7514,7 +7772,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
/* just report this */
dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n",
- hdr0, hdr1);
+ hdr0, hdr1);
reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
}
@@ -7533,7 +7791,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
/* report any remaining errors */
if (reg)
dd_dev_info(dd, "DCC Error: %s\n",
- dcc_err_string(buf, sizeof(buf), reg));
+ dcc_err_string(buf, sizeof(buf), reg));
if (lcl_reason == 0)
lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
@@ -7550,7 +7808,7 @@ static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
char buf[96];
dd_dev_info(dd, "LCB Error: %s\n",
- lcb_err_string(buf, sizeof(buf), reg));
+ lcb_err_string(buf, sizeof(buf), reg));
}
/*
@@ -7640,7 +7898,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
err_detail = "out of range";
}
dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
- err_detail, source);
+ err_detail, source);
}
/*
@@ -7666,7 +7924,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
err_detail = "out of range";
}
dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
- err_detail, source);
+ err_detail, source);
}
/*
@@ -7677,12 +7935,14 @@ static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
char name[64];
dd_dev_err(dd, "unexpected %s interrupt\n",
- is_reserved_name(name, sizeof(name), source));
+ is_reserved_name(name, sizeof(name), source));
}
static const struct is_table is_table[] = {
-/* start end
- name func interrupt func */
+/*
+ * start end
+ * name func interrupt func
+ */
{ IS_GENERAL_ERR_START, IS_GENERAL_ERR_END,
is_misc_err_name, is_misc_err_int },
{ IS_SDMAENG_ERR_START, IS_SDMAENG_ERR_END,
@@ -7753,7 +8013,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
/* phase 2: call the appropriate handler */
for_each_set_bit(bit, (unsigned long *)&regs[0],
- CCE_NUM_INT_CSRS*64) {
+ CCE_NUM_INT_CSRS * 64) {
is_interrupt(dd, bit);
}
@@ -7776,27 +8036,27 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* This read_csr is really bad in the hot path */
status = read_csr(dd,
- CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
- & sde->imask;
+ CCE_INT_STATUS + (8 * (IS_SDMA_START / 64)))
+ & sde->imask;
if (likely(status)) {
/* clear the interrupt(s) */
write_csr(dd,
- CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
- status);
+ CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)),
+ status);
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
} else
dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
- sde->this_idx);
+ sde->this_idx);
return IRQ_HANDLED;
}
/*
- * Clear the receive interrupt, forcing the write and making sure
- * we have data from the chip, pushing everything in front of it
- * back to the host.
+ * Clear the receive interrupt. Use a read of the interrupt clear CSR
+ * to insure that the write completed. This does NOT guarantee that
+ * queued DMA writes to memory from the chip are pushed.
*/
static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
{
@@ -7810,27 +8070,45 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
}
/* force the receive interrupt */
-static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
+void force_recv_intr(struct hfi1_ctxtdata *rcd)
{
write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
}
-/* return non-zero if a packet is present */
+/*
+ * Return non-zero if a packet is present.
+ *
+ * This routine is called when rechecking for packets after the RcvAvail
+ * interrupt has been cleared down. First, do a quick check of memory for
+ * a packet present. If not found, use an expensive CSR read of the context
+ * tail to determine the actual tail. The CSR read is necessary because there
+ * is no method to push pending DMAs to memory other than an interrupt and we
+ * are trying to determine if we need to force an interrupt.
+ */
static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
{
+ u32 tail;
+ int present;
+
if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
- return (rcd->seq_cnt ==
+ present = (rcd->seq_cnt ==
rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+ else /* is RDMA rtail */
+ present = (rcd->head != get_rcvhdrtail(rcd));
+
+ if (present)
+ return 1;
- /* else is RDMA rtail */
- return (rcd->head != get_rcvhdrtail(rcd));
+ /* fall back to a CSR read, correct indpendent of DMA_RTAIL */
+ tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
+ return rcd->head != tail;
}
/*
* Receive packet IRQ handler. This routine expects to be on its own IRQ.
* This routine will try to handle packets immediately (latency), but if
* it finds too many, it will invoke the thread handler (bandwitdh). The
- * chip receive interupt is *not* cleared down until this or the thread (if
+ * chip receive interrupt is *not* cleared down until this or the thread (if
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
@@ -7843,6 +8121,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
trace_hfi1_receive_interrupt(dd, rcd->ctxt);
this_cpu_inc(*dd->int_counter);
+ aspm_ctx_disable(rcd);
/* receive interrupt remains blocked while processing packets */
disposition = rcd->do_interrupt(rcd, 0);
@@ -7909,7 +8188,7 @@ u32 read_physical_state(struct hfi1_devdata *dd)
& DC_DC8051_STS_CUR_STATE_PORT_MASK;
}
-static u32 read_logical_state(struct hfi1_devdata *dd)
+u32 read_logical_state(struct hfi1_devdata *dd)
{
u64 reg;
@@ -8157,8 +8436,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
}
-static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
- u8 lane_id, u32 config_data)
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+ u8 lane_id, u32 config_data)
{
u64 data;
int ret;
@@ -8169,8 +8448,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
- "load 8051 config: field id %d, lane %d, err %d\n",
- (int)field_id, (int)lane_id, ret);
+ "load 8051 config: field id %d, lane %d, err %d\n",
+ (int)field_id, (int)lane_id, ret);
}
return ret;
}
@@ -8180,8 +8459,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
* set the result, even on error.
* Return 0 on success, -errno on failure
*/
-static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
- u32 *result)
+int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
+ u32 *result)
{
u64 big_data;
u32 addr;
@@ -8207,7 +8486,7 @@ static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
} else {
*result = 0;
dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
- __func__, lane_id, field_id);
+ __func__, lane_id, field_id);
}
return ret;
@@ -8244,7 +8523,7 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
u32 frame;
read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
- &frame);
+ &frame);
*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8326,7 +8605,7 @@ static void read_vc_remote_link_width(struct hfi1_devdata *dd,
u32 frame;
read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
- &frame);
+ &frame);
*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
& REMOTE_TX_RATE_MASK;
*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8366,7 +8645,7 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
*link_quality = 0;
if (dd->pport->host_link_state & HLS_UP) {
ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
- &frame);
+ &frame);
if (ret == 0)
*link_quality = (frame >> LINK_QUALITY_SHIFT)
& LINK_QUALITY_MASK;
@@ -8426,10 +8705,9 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
for (lane = 0; lane < 4; lane++) {
ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
if (ret) {
- dd_dev_err(
- dd,
- "Unable to read lane %d firmware details\n",
- lane);
+ dd_dev_err(dd,
+ "Unable to read lane %d firmware details\n",
+ lane);
continue;
}
version = (frame >> SPICO_ROM_VERSION_SHIFT)
@@ -8437,8 +8715,8 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
& SPICO_ROM_PROD_ID_MASK;
dd_dev_info(dd,
- "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
- lane, version, prod_id);
+ "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
+ lane, version, prod_id);
}
}
@@ -8451,11 +8729,10 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
{
int ret;
- ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
- type, data_out);
+ ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd, "read idle message: type %d, err %d\n",
- (u32)type, ret);
+ (u32)type, ret);
return -EINVAL;
}
dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
@@ -8472,8 +8749,8 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
*/
static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
{
- return read_idle_message(dd,
- (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
+ return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT,
+ data);
}
/*
@@ -8489,7 +8766,7 @@ static int send_idle_message(struct hfi1_devdata *dd, u64 data)
ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
- data, ret);
+ data, ret);
return -EINVAL;
}
return 0;
@@ -8504,8 +8781,8 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
{
u64 data;
- data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
- | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
+ data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) |
+ ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
return send_idle_message(dd, data);
}
@@ -8527,7 +8804,7 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
/* LCB_CFG_LOOPBACK.VAL = 2 */
/* LCB_CFG_LANE_WIDTH.VAL = 0 */
write_csr(dd, DC_LCB_CFG_LOOPBACK,
- IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
+ IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
}
@@ -8539,25 +8816,24 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
/* LCB_CFG_RUN.EN = 1 */
write_csr(dd, DC_LCB_CFG_RUN,
- 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+ 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
timeout = jiffies + msecs_to_jiffies(10);
while (1) {
- reg = read_csr(dd,
- DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
if (reg)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
return -ETIMEDOUT;
}
udelay(2);
}
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
- 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
+ 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
}
if (!loopback) {
@@ -8569,10 +8845,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
* done with LCB set up before resuming.
*/
dd_dev_err(dd,
- "Pausing for peer to be finished with LCB set up\n");
+ "Pausing for peer to be finished with LCB set up\n");
msleep(5000);
- dd_dev_err(dd,
- "Continuing with quick linkup\n");
+ dd_dev_err(dd, "Continuing with quick linkup\n");
}
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
@@ -8586,8 +8861,8 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
- "%s: set physical link state to quick LinkUp failed with return %d\n",
- __func__, ret);
+ "%s: set physical link state to quick LinkUp failed with return %d\n",
+ __func__, ret);
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
@@ -8612,8 +8887,8 @@ static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
if (ret == HCMD_SUCCESS)
return 0;
dd_dev_err(dd,
- "Set physical link state to SerDes Loopback failed with return %d\n",
- ret);
+ "Set physical link state to SerDes Loopback failed with return %d\n",
+ ret);
if (ret >= 0)
ret = -EINVAL;
return ret;
@@ -8628,7 +8903,7 @@ static int init_loopback(struct hfi1_devdata *dd)
/* all loopbacks should disable self GUID check */
write_csr(dd, DC_DC8051_CFG_MODE,
- (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
+ (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
/*
* The simulator has only one loopback option - LCB. Switch
@@ -8636,10 +8911,9 @@ static int init_loopback(struct hfi1_devdata *dd)
*
* Accept all valid loopback values.
*/
- if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
- && (loopback == LOOPBACK_SERDES
- || loopback == LOOPBACK_LCB
- || loopback == LOOPBACK_CABLE)) {
+ if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) &&
+ (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
+ loopback == LOOPBACK_CABLE)) {
loopback = LOOPBACK_LCB;
quick_linkup = 1;
return 0;
@@ -8660,7 +8934,7 @@ static int init_loopback(struct hfi1_devdata *dd)
/* not supported in emulation due to emulation RTL changes */
if (dd->icode == ICODE_FPGA_EMULATION) {
dd_dev_err(dd,
- "LCB loopback not supported in emulation\n");
+ "LCB loopback not supported in emulation\n");
return -EINVAL;
}
return 0;
@@ -8687,10 +8961,10 @@ static u16 opa_to_vc_link_widths(u16 opa_widths)
u16 from;
u16 to;
} opa_link_xlate[] = {
- { OPA_LINK_WIDTH_1X, 1 << (1-1) },
- { OPA_LINK_WIDTH_2X, 1 << (2-1) },
- { OPA_LINK_WIDTH_3X, 1 << (3-1) },
- { OPA_LINK_WIDTH_4X, 1 << (4-1) },
+ { OPA_LINK_WIDTH_1X, 1 << (1 - 1) },
+ { OPA_LINK_WIDTH_2X, 1 << (2 - 1) },
+ { OPA_LINK_WIDTH_3X, 1 << (3 - 1) },
+ { OPA_LINK_WIDTH_4X, 1 << (4 - 1) },
};
for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
@@ -8716,7 +8990,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
/* set the local tx rate - need to read-modify-write */
ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
- &rx_polarity_inversion, &ppd->local_tx_rate);
+ &rx_polarity_inversion, &ppd->local_tx_rate);
if (ret)
goto set_local_link_attributes_fail;
@@ -8737,15 +9011,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
enable_lane_tx = 0xF; /* enable all four lanes */
ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
- rx_polarity_inversion, ppd->local_tx_rate);
+ rx_polarity_inversion, ppd->local_tx_rate);
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
/*
* DC supports continuous updates.
*/
- ret = write_vc_local_phy(dd, 0 /* no power management */,
- 1 /* continuous updates */);
+ ret = write_vc_local_phy(dd,
+ 0 /* no power management */,
+ 1 /* continuous updates */);
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
@@ -8756,7 +9031,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
goto set_local_link_attributes_fail;
ret = write_vc_local_link_width(dd, 0, 0,
- opa_to_vc_link_widths(ppd->link_width_enabled));
+ opa_to_vc_link_widths(
+ ppd->link_width_enabled));
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
@@ -8767,8 +9043,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
set_local_link_attributes_fail:
dd_dev_err(dd,
- "Failed to set local link attributes, return 0x%x\n",
- ret);
+ "Failed to set local link attributes, return 0x%x\n",
+ ret);
return ret;
}
@@ -8781,54 +9057,101 @@ int start_link(struct hfi1_pportdata *ppd)
{
if (!ppd->link_enabled) {
dd_dev_info(ppd->dd,
- "%s: stopping link start because link is disabled\n",
- __func__);
+ "%s: stopping link start because link is disabled\n",
+ __func__);
return 0;
}
if (!ppd->driver_link_ready) {
dd_dev_info(ppd->dd,
- "%s: stopping link start because driver is not ready\n",
- __func__);
+ "%s: stopping link start because driver is not ready\n",
+ __func__);
return 0;
}
if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
- loopback == LOOPBACK_LCB ||
- ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+ loopback == LOOPBACK_LCB ||
+ ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
return set_link_state(ppd, HLS_DN_POLL);
dd_dev_info(ppd->dd,
- "%s: stopping link start because no cable is present\n",
- __func__);
+ "%s: stopping link start because no cable is present\n",
+ __func__);
return -EAGAIN;
}
-static void reset_qsfp(struct hfi1_pportdata *ppd)
+static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ u64 mask;
+ unsigned long timeout;
+
+ /*
+ * Check for QSFP interrupt for t_init (SFF 8679)
+ */
+ timeout = jiffies + msecs_to_jiffies(2000);
+ while (1) {
+ mask = read_csr(dd, dd->hfi1_id ?
+ ASIC_QSFP2_IN : ASIC_QSFP1_IN);
+ if (!(mask & QSFP_HFI0_INT_N)) {
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
+ ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+ break;
+ }
+ if (time_after(jiffies, timeout)) {
+ dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
+ __func__);
+ break;
+ }
+ udelay(2);
+ }
+}
+
+static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ u64 mask;
+
+ mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
+ if (enable)
+ mask |= (u64)QSFP_HFI0_INT_N;
+ else
+ mask &= ~(u64)QSFP_HFI0_INT_N;
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
+}
+
+void reset_qsfp(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
u64 mask, qsfp_mask;
+ /* Disable INT_N from triggering QSFP interrupts */
+ set_qsfp_int_n(ppd, 0);
+
+ /* Reset the QSFP */
mask = (u64)QSFP_HFI0_RESET_N;
- qsfp_mask = read_csr(dd,
- dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
+ qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
qsfp_mask |= mask;
- write_csr(dd,
- dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
- qsfp_mask);
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
qsfp_mask = read_csr(dd,
- dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
+ dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
qsfp_mask &= ~mask;
write_csr(dd,
- dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
- qsfp_mask);
+ dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
udelay(10);
qsfp_mask |= mask;
write_csr(dd,
- dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
- qsfp_mask);
+ dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
+
+ wait_for_qsfp_init(ppd);
+
+ /*
+ * Allow INT_N to trigger the QSFP interrupt to watch
+ * for alarms and warnings
+ */
+ set_qsfp_int_n(ppd, 1);
}
static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -8837,102 +9160,86 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd = ppd->dd;
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
- (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd,
- "%s: QSFP cable on fire\n",
- __func__);
+ (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
+ dd_dev_info(dd, "%s: QSFP cable on fire\n",
+ __func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
- (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
- dd_dev_info(dd,
- "%s: QSFP cable temperature too low\n",
- __func__);
+ (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
+ dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
+ __func__);
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
- (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
- dd_dev_info(dd,
- "%s: QSFP supply voltage too high\n",
- __func__);
+ (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
+ dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
+ __func__);
if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
- (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
- dd_dev_info(dd,
- "%s: QSFP supply voltage too low\n",
- __func__);
+ (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
+ dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
+ __func__);
/* Byte 2 is vendor specific */
if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
- (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable RX channel 1/2 power too high\n",
- __func__);
+ (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
- (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable RX channel 1/2 power too low\n",
- __func__);
+ (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
- (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable RX channel 3/4 power too high\n",
- __func__);
+ (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
- (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable RX channel 3/4 power too low\n",
- __func__);
+ (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
- (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 1/2 bias too high\n",
- __func__);
+ (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
- (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 1/2 bias too low\n",
- __func__);
+ (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
- (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 3/4 bias too high\n",
- __func__);
+ (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
- (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 3/4 bias too low\n",
- __func__);
+ (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
- (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 1/2 power too high\n",
- __func__);
+ (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
- (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 1/2 power too low\n",
- __func__);
+ (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
- (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 3/4 power too high\n",
- __func__);
+ (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
- (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd,
- "%s: Cable TX channel 3/4 power too low\n",
- __func__);
+ (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
+ dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
+ __func__);
/* Bytes 9-10 and 11-12 are reserved */
/* Bytes 13-15 are vendor specific */
@@ -8940,35 +9247,8 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
return 0;
}
-static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
-{
- refresh_qsfp_cache(ppd, &ppd->qsfp_info);
-
- return 0;
-}
-
-static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
-{
- struct hfi1_devdata *dd = ppd->dd;
- u8 qsfp_interrupt_status = 0;
-
- if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
- != 1) {
- dd_dev_info(dd,
- "%s: Failed to read status of QSFP module\n",
- __func__);
- return -EIO;
- }
-
- /* We don't care about alarms & warnings with a non-functional INT_N */
- if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
- do_pre_lni_host_behaviors(ppd);
-
- return 0;
-}
-
/* This routine will only be scheduled if the QSFP module is present */
-static void qsfp_event(struct work_struct *work)
+void qsfp_event(struct work_struct *work)
{
struct qsfp_data *qd;
struct hfi1_pportdata *ppd;
@@ -8990,76 +9270,75 @@ static void qsfp_event(struct work_struct *work)
dc_start(dd);
if (qd->cache_refresh_required) {
- msleep(3000);
- reset_qsfp(ppd);
+ set_qsfp_int_n(ppd, 0);
- /* Check for QSFP interrupt after t_init (SFF 8679)
- * + extra
+ wait_for_qsfp_init(ppd);
+
+ /*
+ * Allow INT_N to trigger the QSFP interrupt to watch
+ * for alarms and warnings
*/
- msleep(3000);
- if (!qd->qsfp_interrupt_functional) {
- if (do_qsfp_intr_fallback(ppd) < 0)
- dd_dev_info(dd, "%s: QSFP fallback failed\n",
- __func__);
- ppd->driver_link_ready = 1;
- start_link(ppd);
- }
+ set_qsfp_int_n(ppd, 1);
+
+ tune_serdes(ppd);
+
+ start_link(ppd);
}
if (qd->check_interrupt_flags) {
u8 qsfp_interrupt_status[16] = {0,};
- if (qsfp_read(ppd, dd->hfi1_id, 6,
- &qsfp_interrupt_status[0], 16) != 16) {
+ if (one_qsfp_read(ppd, dd->hfi1_id, 6,
+ &qsfp_interrupt_status[0], 16) != 16) {
dd_dev_info(dd,
- "%s: Failed to read status of QSFP module\n",
- __func__);
+ "%s: Failed to read status of QSFP module\n",
+ __func__);
} else {
unsigned long flags;
- u8 data_status;
+ handle_qsfp_error_conditions(
+ ppd, qsfp_interrupt_status);
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.check_interrupt_flags = 0;
spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
- flags);
-
- if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
- != 1) {
- dd_dev_info(dd,
- "%s: Failed to read status of QSFP module\n",
- __func__);
- }
- if (!(data_status & QSFP_DATA_NOT_READY)) {
- do_pre_lni_host_behaviors(ppd);
- start_link(ppd);
- } else
- handle_qsfp_error_conditions(ppd,
- qsfp_interrupt_status);
+ flags);
}
}
}
-void init_qsfp(struct hfi1_pportdata *ppd)
+static void init_qsfp_int(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd = ppd->dd;
- u64 qsfp_mask;
+ struct hfi1_pportdata *ppd = dd->pport;
+ u64 qsfp_mask, cce_int_mask;
+ const int qsfp1_int_smask = QSFP1_INT % 64;
+ const int qsfp2_int_smask = QSFP2_INT % 64;
- if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
- ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
- ppd->driver_link_ready = 1;
- return;
+ /*
+ * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
+ * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
+ * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
+ * the index of the appropriate CSR in the CCEIntMask CSR array
+ */
+ cce_int_mask = read_csr(dd, CCE_INT_MASK +
+ (8 * (QSFP1_INT / 64)));
+ if (dd->hfi1_id) {
+ cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
+ write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
+ cce_int_mask);
+ } else {
+ cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
+ write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
+ cce_int_mask);
}
- ppd->qsfp_info.ppd = ppd;
- INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
-
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
- write_csr(dd,
- dd->hfi1_id ?
- ASIC_QSFP2_CLEAR :
- ASIC_QSFP1_CLEAR,
- qsfp_mask);
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+ qsfp_mask);
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
+ qsfp_mask);
+
+ set_qsfp_int_n(ppd, 0);
/* Handle active low nature of INT_N and MODPRST_N pins */
if (qsfp_mod_present(ppd))
@@ -9067,29 +9346,6 @@ void init_qsfp(struct hfi1_pportdata *ppd)
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
-
- /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
- qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
- write_csr(dd,
- dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
- qsfp_mask);
-
- if (qsfp_mod_present(ppd)) {
- msleep(3000);
- reset_qsfp(ppd);
-
- /* Check for QSFP interrupt after t_init (SFF 8679)
- * + extra
- */
- msleep(3000);
- if (!ppd->qsfp_info.qsfp_interrupt_functional) {
- if (do_qsfp_intr_fallback(ppd) < 0)
- dd_dev_info(dd,
- "%s: QSFP fallback failed\n",
- __func__);
- ppd->driver_link_ready = 1;
- }
- }
}
/*
@@ -9097,6 +9353,10 @@ void init_qsfp(struct hfi1_pportdata *ppd)
*/
static void init_lcb(struct hfi1_devdata *dd)
{
+ /* simulator does not correctly handle LCB cclk loopback, skip */
+ if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+ return;
+
/* the DC has been reset earlier in the driver load */
/* set LCB for cclk loopback on the port */
@@ -9125,8 +9385,6 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
ppd->guid = guid;
}
- /* the link defaults to enabled */
- ppd->link_enabled = 1;
/* Set linkinit_reason on power up per OPA spec */
ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
@@ -9139,6 +9397,12 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
return ret;
}
+ /* tune the SERDES to a ballpark setting for
+ * optimal signal and bit error rate
+ * Needs to be done before starting the link
+ */
+ tune_serdes(ppd);
+
return start_link(ppd);
}
@@ -9156,8 +9420,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
- OPA_LINKDOWN_REASON_SMA_DISABLED);
+ OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_state(ppd, HLS_DN_OFFLINE);
/* disable the port */
@@ -9171,14 +9437,14 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
- ppd->ibport_data.rc_acks = NULL;
- ppd->ibport_data.rc_qacks = NULL;
- ppd->ibport_data.rc_acks = alloc_percpu(u64);
- ppd->ibport_data.rc_qacks = alloc_percpu(u64);
- ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
- if ((ppd->ibport_data.rc_acks == NULL) ||
- (ppd->ibport_data.rc_delayed_comp == NULL) ||
- (ppd->ibport_data.rc_qacks == NULL))
+ ppd->ibport_data.rvp.rc_acks = NULL;
+ ppd->ibport_data.rvp.rc_qacks = NULL;
+ ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+ ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+ ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+ if (!ppd->ibport_data.rvp.rc_acks ||
+ !ppd->ibport_data.rvp.rc_delayed_comp ||
+ !ppd->ibport_data.rvp.rc_qacks)
return -ENOMEM;
}
@@ -9213,8 +9479,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
pa = 0;
} else if (type > PT_INVALID) {
dd_dev_err(dd,
- "unexpected receive array type %u for index %u, not handled\n",
- type, index);
+ "unexpected receive array type %u for index %u, not handled\n",
+ type, index);
goto done;
}
@@ -9429,12 +9695,15 @@ static void set_send_length(struct hfi1_pportdata *ppd)
/* all kernel receive contexts have the same hdrqentsize */
for (i = 0; i < ppd->vls_supported; i++) {
sc_set_cr_threshold(dd->vld[i].sc,
- sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
- dd->rcd[0]->rcvhdrqentsize));
+ sc_mtu_to_threshold(dd->vld[i].sc,
+ dd->vld[i].mtu,
+ dd->rcd[0]->
+ rcvhdrqentsize));
}
sc_set_cr_threshold(dd->vld[15].sc,
- sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
- dd->rcd[0]->rcvhdrqentsize));
+ sc_mtu_to_threshold(dd->vld[15].sc,
+ dd->vld[15].mtu,
+ dd->rcd[0]->rcvhdrqentsize));
/* Adjust maximum MTU for the port in DC */
dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@ -9460,7 +9729,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
- << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
+ << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
@@ -9495,8 +9764,8 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
- "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
- state, curr_state);
+ "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+ state, curr_state);
return -ETIMEDOUT;
}
usleep_range(1950, 2050); /* sleep 2ms-ish */
@@ -9539,17 +9808,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
if (do_transition) {
ret = set_physical_link_state(dd,
- PLS_OFFLINE | (rem_reason << 8));
+ (rem_reason << 8) | PLS_OFFLINE);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
- "Failed to transition to Offline link state, return %d\n",
- ret);
+ "Failed to transition to Offline link state, return %d\n",
+ ret);
return -EINVAL;
}
- if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
ppd->offline_disabled_reason =
- OPA_LINKDOWN_REASON_TRANSIENT;
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
}
if (do_wait) {
@@ -9570,6 +9840,22 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+ if (ppd->port_type == PORT_TYPE_QSFP &&
+ ppd->qsfp_info.limiting_active &&
+ qsfp_mod_present(ppd)) {
+ int ret;
+
+ ret = acquire_chip_resource(dd, qsfp_resource(dd), QSFP_WAIT);
+ if (ret == 0) {
+ set_qsfp_tx(ppd, 0);
+ release_chip_resource(dd, qsfp_resource(dd));
+ } else {
+ /* not fatal, but should warn */
+ dd_dev_err(dd,
+ "Unable to acquire lock to turn off QSFP TX\n");
+ }
+ }
+
/*
* The LNI has a mandatory wait time after the physical state
* moves to Offline.Quiet. The wait time may be different
@@ -9582,7 +9868,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
ret = wait_fm_ready(dd, 7000);
if (ret) {
dd_dev_err(dd,
- "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
+ "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
/* state is really offline, so make it so */
ppd->host_link_state = HLS_DN_OFFLINE;
return ret;
@@ -9605,8 +9891,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
read_last_local_state(dd, &last_local_state);
read_last_remote_state(dd, &last_remote_state);
dd_dev_err(dd,
- "LNI failure last states: local 0x%08x, remote 0x%08x\n",
- last_local_state, last_remote_state);
+ "LNI failure last states: local 0x%08x, remote 0x%08x\n",
+ last_local_state, last_remote_state);
}
/* the active link width (downgrade) is 0 on link down */
@@ -9754,14 +10040,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
state = dd->link_default;
/* interpret poll -> poll as a link bounce */
- poll_bounce = ppd->host_link_state == HLS_DN_POLL
- && state == HLS_DN_POLL;
+ poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
+ state == HLS_DN_POLL;
dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
- link_state_name(ppd->host_link_state),
- link_state_name(orig_new_state),
- poll_bounce ? "(bounce) " : "",
- link_state_reason_name(ppd, state));
+ link_state_name(ppd->host_link_state),
+ link_state_name(orig_new_state),
+ poll_bounce ? "(bounce) " : "",
+ link_state_reason_name(ppd, state));
was_up = !!(ppd->host_link_state & HLS_UP);
@@ -9782,8 +10068,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
switch (state) {
case HLS_UP_INIT:
- if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
- || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
+ if (ppd->host_link_state == HLS_DN_POLL &&
+ (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
/*
* Quick link up jumps from polling to here.
*
@@ -9791,7 +10077,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
* simulator jumps from polling to link up.
* Accept that here.
*/
- /* OK */;
+ /* OK */
} else if (ppd->host_link_state != HLS_GOING_UP) {
goto unexpected;
}
@@ -9802,8 +10088,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
/* logical state didn't change, stay at going_up */
ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
- "%s: logical state did not change to INIT\n",
- __func__);
+ "%s: logical state did not change to INIT\n",
+ __func__);
} else {
/* clear old transient LINKINIT_REASON code */
if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
@@ -9827,8 +10113,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
/* logical state didn't change, stay at init */
ppd->host_link_state = HLS_UP_INIT;
dd_dev_err(dd,
- "%s: logical state did not change to ARMED\n",
- __func__);
+ "%s: logical state did not change to ARMED\n",
+ __func__);
}
/*
* The simulator does not currently implement SMA messages,
@@ -9849,15 +10135,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
/* logical state didn't change, stay at armed */
ppd->host_link_state = HLS_UP_ARMED;
dd_dev_err(dd,
- "%s: logical state did not change to ACTIVE\n",
- __func__);
+ "%s: logical state did not change to ACTIVE\n",
+ __func__);
} else {
-
/* tell all engines to go running */
sdma_all_running(dd);
/* Signal the IB layer that the port has went active */
- event.device = &dd->verbs_dev.ibdev;
+ event.device = &dd->verbs_dev.rdi.ibdev;
event.element.port_num = ppd->port;
event.event = IB_EVENT_PORT_ACTIVE;
}
@@ -9884,6 +10169,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ppd->link_enabled = 1;
}
+ set_all_slowpath(ppd->dd);
ret = set_local_link_attributes(ppd);
if (ret)
break;
@@ -9898,12 +10184,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret1 = set_physical_link_state(dd, PLS_POLLING);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
- "Failed to transition to Polling link state, return 0x%x\n",
- ret1);
+ "Failed to transition to Polling link state, return 0x%x\n",
+ ret1);
ret = -EINVAL;
}
}
- ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
* If an error occurred above, go back to offline. The
* caller may reschedule another attempt.
@@ -9928,8 +10215,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret1 = set_physical_link_state(dd, PLS_DISABLED);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
- "Failed to transition to Disabled link state, return 0x%x\n",
- ret1);
+ "Failed to transition to Disabled link state, return 0x%x\n",
+ ret1);
ret = -EINVAL;
break;
}
@@ -9957,8 +10244,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret1 = set_physical_link_state(dd, PLS_LINKUP);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
- "Failed to transition to link up state, return 0x%x\n",
- ret1);
+ "Failed to transition to link up state, return 0x%x\n",
+ ret1);
ret = -EINVAL;
break;
}
@@ -9969,7 +10256,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
case HLS_LINK_COOLDOWN: /* transient within goto_offline() */
default:
dd_dev_info(dd, "%s: state 0x%x: not supported\n",
- __func__, state);
+ __func__, state);
ret = -EINVAL;
break;
}
@@ -9989,8 +10276,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
unexpected:
dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
- __func__, link_state_name(ppd->host_link_state),
- link_state_name(state));
+ __func__, link_state_name(ppd->host_link_state),
+ link_state_name(state));
ret = -EINVAL;
done:
@@ -10016,7 +10303,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
* The VL Arbitrator high limit is sent in units of 4k
* bytes, while HFI stores it in units of 64 bytes.
*/
- val *= 4096/64;
+ val *= 4096 / 64;
reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
@@ -10031,12 +10318,6 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
ppd->vls_operational = val;
if (!ppd->port)
ret = -EINVAL;
- else
- ret = sdma_map_init(
- ppd->dd,
- ppd->port - 1,
- val,
- NULL);
}
break;
/*
@@ -10084,8 +10365,8 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
default:
if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
dd_dev_info(ppd->dd,
- "%s: which %s, val 0x%x: not implemented\n",
- __func__, ib_cfg_name(which), val);
+ "%s: which %s, val 0x%x: not implemented\n",
+ __func__, ib_cfg_name(which), val);
break;
}
return ret;
@@ -10152,6 +10433,7 @@ static int vl_arb_match_cache(struct vl_arb_cache *cache,
{
return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
}
+
/* end functions related to vl arbitration table caching */
static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
@@ -10239,7 +10521,7 @@ static int get_buffer_control(struct hfi1_devdata *dd,
/* OPA and HFI have a 1-1 mapping */
for (i = 0; i < TXE_NUM_DATA_VL; i++)
- read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
+ read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8 * i), &bc->vl[i]);
/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
@@ -10293,41 +10575,41 @@ static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
{
write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
- DC_SC_VL_VAL(15_0,
- 0, dp->vlnt[0] & 0xf,
- 1, dp->vlnt[1] & 0xf,
- 2, dp->vlnt[2] & 0xf,
- 3, dp->vlnt[3] & 0xf,
- 4, dp->vlnt[4] & 0xf,
- 5, dp->vlnt[5] & 0xf,
- 6, dp->vlnt[6] & 0xf,
- 7, dp->vlnt[7] & 0xf,
- 8, dp->vlnt[8] & 0xf,
- 9, dp->vlnt[9] & 0xf,
- 10, dp->vlnt[10] & 0xf,
- 11, dp->vlnt[11] & 0xf,
- 12, dp->vlnt[12] & 0xf,
- 13, dp->vlnt[13] & 0xf,
- 14, dp->vlnt[14] & 0xf,
- 15, dp->vlnt[15] & 0xf));
+ DC_SC_VL_VAL(15_0,
+ 0, dp->vlnt[0] & 0xf,
+ 1, dp->vlnt[1] & 0xf,
+ 2, dp->vlnt[2] & 0xf,
+ 3, dp->vlnt[3] & 0xf,
+ 4, dp->vlnt[4] & 0xf,
+ 5, dp->vlnt[5] & 0xf,
+ 6, dp->vlnt[6] & 0xf,
+ 7, dp->vlnt[7] & 0xf,
+ 8, dp->vlnt[8] & 0xf,
+ 9, dp->vlnt[9] & 0xf,
+ 10, dp->vlnt[10] & 0xf,
+ 11, dp->vlnt[11] & 0xf,
+ 12, dp->vlnt[12] & 0xf,
+ 13, dp->vlnt[13] & 0xf,
+ 14, dp->vlnt[14] & 0xf,
+ 15, dp->vlnt[15] & 0xf));
write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
- DC_SC_VL_VAL(31_16,
- 16, dp->vlnt[16] & 0xf,
- 17, dp->vlnt[17] & 0xf,
- 18, dp->vlnt[18] & 0xf,
- 19, dp->vlnt[19] & 0xf,
- 20, dp->vlnt[20] & 0xf,
- 21, dp->vlnt[21] & 0xf,
- 22, dp->vlnt[22] & 0xf,
- 23, dp->vlnt[23] & 0xf,
- 24, dp->vlnt[24] & 0xf,
- 25, dp->vlnt[25] & 0xf,
- 26, dp->vlnt[26] & 0xf,
- 27, dp->vlnt[27] & 0xf,
- 28, dp->vlnt[28] & 0xf,
- 29, dp->vlnt[29] & 0xf,
- 30, dp->vlnt[30] & 0xf,
- 31, dp->vlnt[31] & 0xf));
+ DC_SC_VL_VAL(31_16,
+ 16, dp->vlnt[16] & 0xf,
+ 17, dp->vlnt[17] & 0xf,
+ 18, dp->vlnt[18] & 0xf,
+ 19, dp->vlnt[19] & 0xf,
+ 20, dp->vlnt[20] & 0xf,
+ 21, dp->vlnt[21] & 0xf,
+ 22, dp->vlnt[22] & 0xf,
+ 23, dp->vlnt[23] & 0xf,
+ 24, dp->vlnt[24] & 0xf,
+ 25, dp->vlnt[25] & 0xf,
+ 26, dp->vlnt[26] & 0xf,
+ 27, dp->vlnt[27] & 0xf,
+ 28, dp->vlnt[28] & 0xf,
+ 29, dp->vlnt[29] & 0xf,
+ 30, dp->vlnt[30] & 0xf,
+ 31, dp->vlnt[31] & 0xf));
}
static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
@@ -10335,7 +10617,7 @@ static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
{
if (limit != 0)
dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
- what, (int)limit, idx);
+ what, (int)limit, idx);
}
/* change only the shared limit portion of SendCmGLobalCredit */
@@ -10413,14 +10695,14 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
}
dd_dev_err(dd,
- "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
- which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
+ "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
+ which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
/*
* If this occurs, it is likely there was a credit loss on the link.
* The only recovery from that is a link bounce.
*/
dd_dev_err(dd,
- "Continuing anyway. A credit loss may occur. Suggest a link bounce\n");
+ "Continuing anyway. A credit loss may occur. Suggest a link bounce\n");
}
/*
@@ -10447,13 +10729,15 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
* raise = if the new limit is higher than the current value (may be changed
* earlier in the algorithm), set the new limit to the new value
*/
-static int set_buffer_control(struct hfi1_devdata *dd,
- struct buffer_control *new_bc)
+int set_buffer_control(struct hfi1_pportdata *ppd,
+ struct buffer_control *new_bc)
{
+ struct hfi1_devdata *dd = ppd->dd;
u64 changing_mask, ld_mask, stat_mask;
int change_count;
int i, use_all_mask;
int this_shared_changing;
+ int vl_count = 0, ret;
/*
* A0: add the variable any_shared_limit_changing below and in the
* algorithm above. If removing A0 support, it can be removed.
@@ -10478,7 +10762,6 @@ static int set_buffer_control(struct hfi1_devdata *dd,
#define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
#define NUM_USABLE_VLS 16 /* look at VL15 and less */
-
/* find the new total credits, do sanity check on unused VLs */
for (i = 0; i < OPA_MAX_VLS; i++) {
if (valid_vl(i)) {
@@ -10486,9 +10769,9 @@ static int set_buffer_control(struct hfi1_devdata *dd,
continue;
}
nonzero_msg(dd, i, "dedicated",
- be16_to_cpu(new_bc->vl[i].dedicated));
+ be16_to_cpu(new_bc->vl[i].dedicated));
nonzero_msg(dd, i, "shared",
- be16_to_cpu(new_bc->vl[i].shared));
+ be16_to_cpu(new_bc->vl[i].shared));
new_bc->vl[i].dedicated = 0;
new_bc->vl[i].shared = 0;
}
@@ -10502,8 +10785,10 @@ static int set_buffer_control(struct hfi1_devdata *dd,
*/
memset(changing, 0, sizeof(changing));
memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
- /* NOTE: Assumes that the individual VL bits are adjacent and in
- increasing order */
+ /*
+ * NOTE: Assumes that the individual VL bits are adjacent and in
+ * increasing order
+ */
stat_mask =
SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
changing_mask = 0;
@@ -10517,8 +10802,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
!= cur_bc.vl[i].shared;
if (this_shared_changing)
any_shared_limit_changing = 1;
- if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
- || this_shared_changing) {
+ if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated ||
+ this_shared_changing) {
changing[i] = 1;
changing_mask |= stat_mask;
change_count++;
@@ -10557,7 +10842,7 @@ static int set_buffer_control(struct hfi1_devdata *dd,
}
wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
- "shared");
+ "shared");
if (change_count > 0) {
for (i = 0; i < NUM_USABLE_VLS; i++) {
@@ -10566,7 +10851,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
if (lowering_dedicated[i]) {
set_vl_dedicated(dd, i,
- be16_to_cpu(new_bc->vl[i].dedicated));
+ be16_to_cpu(new_bc->
+ vl[i].dedicated));
cur_bc.vl[i].dedicated =
new_bc->vl[i].dedicated;
}
@@ -10582,7 +10868,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
if (be16_to_cpu(new_bc->vl[i].dedicated) >
be16_to_cpu(cur_bc.vl[i].dedicated))
set_vl_dedicated(dd, i,
- be16_to_cpu(new_bc->vl[i].dedicated));
+ be16_to_cpu(new_bc->
+ vl[i].dedicated));
}
}
@@ -10598,13 +10885,35 @@ static int set_buffer_control(struct hfi1_devdata *dd,
/* finally raise the global shared */
if (be16_to_cpu(new_bc->overall_shared_limit) >
- be16_to_cpu(cur_bc.overall_shared_limit))
+ be16_to_cpu(cur_bc.overall_shared_limit))
set_global_shared(dd,
- be16_to_cpu(new_bc->overall_shared_limit));
+ be16_to_cpu(new_bc->overall_shared_limit));
/* bracket the credit change with a total adjustment */
if (new_total < cur_total)
set_global_limit(dd, new_total);
+
+ /*
+ * Determine the actual number of operational VLS using the number of
+ * dedicated and shared credits for each VL.
+ */
+ if (change_count > 0) {
+ for (i = 0; i < TXE_NUM_DATA_VL; i++)
+ if (be16_to_cpu(new_bc->vl[i].dedicated) > 0 ||
+ be16_to_cpu(new_bc->vl[i].shared) > 0)
+ vl_count++;
+ ppd->actual_vls_operational = vl_count;
+ ret = sdma_map_init(dd, ppd->port - 1, vl_count ?
+ ppd->actual_vls_operational :
+ ppd->vls_operational,
+ NULL);
+ if (ret == 0)
+ ret = pio_map_init(dd, ppd->port - 1, vl_count ?
+ ppd->actual_vls_operational :
+ ppd->vls_operational, NULL);
+ if (ret)
+ return ret;
+ }
return 0;
}
@@ -10696,7 +11005,7 @@ int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
VL_ARB_LOW_PRIO_TABLE_SIZE, t);
break;
case FM_TBL_BUFFER_CONTROL:
- ret = set_buffer_control(ppd->dd, t);
+ ret = set_buffer_control(ppd, t);
break;
case FM_TBL_SC2VLNT:
set_sc2vlnt(ppd->dd, t);
@@ -10846,10 +11155,13 @@ static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
}
rcd->rcvavail_timeout = timeout;
- /* timeout cannot be larger than rcv_intr_timeout_csr which has already
- been verified to be in range */
+ /*
+ * timeout cannot be larger than rcv_intr_timeout_csr which has already
+ * been verified to be in range
+ */
write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
- (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
+ (u64)timeout <<
+ RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
}
void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
@@ -10915,16 +11227,16 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
static u32 encoded_size(u32 size)
{
switch (size) {
- case 4*1024: return 0x1;
- case 8*1024: return 0x2;
- case 16*1024: return 0x3;
- case 32*1024: return 0x4;
- case 64*1024: return 0x5;
- case 128*1024: return 0x6;
- case 256*1024: return 0x7;
- case 512*1024: return 0x8;
- case 1*1024*1024: return 0x9;
- case 2*1024*1024: return 0xa;
+ case 4 * 1024: return 0x1;
+ case 8 * 1024: return 0x2;
+ case 16 * 1024: return 0x3;
+ case 32 * 1024: return 0x4;
+ case 64 * 1024: return 0x5;
+ case 128 * 1024: return 0x6;
+ case 256 * 1024: return 0x7;
+ case 512 * 1024: return 0x8;
+ case 1 * 1024 * 1024: return 0x9;
+ case 2 * 1024 * 1024: return 0xa;
}
return 0x1; /* if invalid, go with the minimum size */
}
@@ -10943,8 +11255,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
/* if the context already enabled, don't do the extra steps */
- if ((op & HFI1_RCVCTRL_CTXT_ENB)
- && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
+ if ((op & HFI1_RCVCTRL_CTXT_ENB) &&
+ !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_phys);
@@ -11018,6 +11330,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
if (dd->rcvhdrtail_dummy_physaddr) {
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
dd->rcvhdrtail_dummy_physaddr);
+ /* Enabling RcvCtxtCtrl.TailUpd is intentional. */
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
}
@@ -11029,15 +11342,20 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_DIS)
- rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+ if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
+ /* See comment on RcvCtxtCtrl.TailUpd above */
+ if (!(op & HFI1_RCVCTRL_CTXT_DIS))
+ rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+ }
if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
- /* In one-packet-per-eager mode, the size comes from
- the RcvArray entry. */
+ /*
+ * In one-packet-per-eager mode, the size comes from
+ * the RcvArray entry.
+ */
rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
}
@@ -11056,19 +11374,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
/* work around sticky RcvCtxtStatus.BlockedRHQFull */
- if (did_enable
- && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
+ if (did_enable &&
+ (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
if (reg != 0) {
dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
- ctxt, reg);
+ ctxt, reg);
read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
- ctxt, reg, reg == 0 ? "not" : "still");
+ ctxt, reg, reg == 0 ? "not" : "still");
}
}
@@ -11079,7 +11397,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
*/
/* set interrupt timeout */
write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
- (u64)rcd->rcvavail_timeout <<
+ (u64)rcd->rcvavail_timeout <<
RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
@@ -11097,28 +11415,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
dd->rcvhdrtail_dummy_physaddr);
}
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
- u64 **cntrp)
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
{
int ret;
u64 val = 0;
if (namep) {
ret = dd->cntrnameslen;
- if (pos != 0) {
- dd_dev_err(dd, "read_cntrs does not support indexing");
- return 0;
- }
*namep = dd->cntrnames;
} else {
const struct cntr_entry *entry;
int i, j;
ret = (dd->ndevcntrs) * sizeof(u64);
- if (pos != 0) {
- dd_dev_err(dd, "read_cntrs does not support indexing");
- return 0;
- }
/* Get the start of the block of counters */
*cntrp = dd->cntrs;
@@ -11147,6 +11456,20 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
dd->cntrs[entry->offset + j] =
val;
}
+ } else if (entry->flags & CNTR_SDMA) {
+ hfi1_cdbg(CNTR,
+ "\t Per SDMA Engine\n");
+ for (j = 0; j < dd->chip_sdma_engines;
+ j++) {
+ val =
+ entry->rw_cntr(entry, dd, j,
+ CNTR_MODE_R, 0);
+ hfi1_cdbg(CNTR,
+ "\t\tRead 0x%llx for %d\n",
+ val, j);
+ dd->cntrs[entry->offset + j] =
+ val;
+ }
} else {
val = entry->rw_cntr(entry, dd,
CNTR_INVALID_VL,
@@ -11163,30 +11486,19 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
/*
* Used by sysfs to create files for hfi stats to read
*/
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
- char **namep, u64 **cntrp)
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
{
int ret;
u64 val = 0;
if (namep) {
- ret = dd->portcntrnameslen;
- if (pos != 0) {
- dd_dev_err(dd, "index not supported");
- return 0;
- }
- *namep = dd->portcntrnames;
+ ret = ppd->dd->portcntrnameslen;
+ *namep = ppd->dd->portcntrnames;
} else {
const struct cntr_entry *entry;
- struct hfi1_pportdata *ppd;
int i, j;
- ret = (dd->nportcntrs) * sizeof(u64);
- if (pos != 0) {
- dd_dev_err(dd, "indexing not supported");
- return 0;
- }
- ppd = (struct hfi1_pportdata *)(dd + 1 + port);
+ ret = ppd->dd->nportcntrs * sizeof(u64);
*cntrp = ppd->cntrs;
for (i = 0; i < PORT_CNTR_LAST; i++) {
@@ -11235,14 +11547,14 @@ static void free_cntrs(struct hfi1_devdata *dd)
for (i = 0; i < dd->num_pports; i++, ppd++) {
kfree(ppd->cntrs);
kfree(ppd->scntrs);
- free_percpu(ppd->ibport_data.rc_acks);
- free_percpu(ppd->ibport_data.rc_qacks);
- free_percpu(ppd->ibport_data.rc_delayed_comp);
+ free_percpu(ppd->ibport_data.rvp.rc_acks);
+ free_percpu(ppd->ibport_data.rvp.rc_qacks);
+ free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
ppd->cntrs = NULL;
ppd->scntrs = NULL;
- ppd->ibport_data.rc_acks = NULL;
- ppd->ibport_data.rc_qacks = NULL;
- ppd->ibport_data.rc_delayed_comp = NULL;
+ ppd->ibport_data.rvp.rc_acks = NULL;
+ ppd->ibport_data.rvp.rc_qacks = NULL;
+ ppd->ibport_data.rvp.rc_delayed_comp = NULL;
}
kfree(dd->portcntrnames);
dd->portcntrnames = NULL;
@@ -11510,11 +11822,13 @@ mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
#define C_MAX_NAME 13 /* 12 chars + one for /0 */
static int init_cntrs(struct hfi1_devdata *dd)
{
- int i, rcv_ctxts, index, j;
+ int i, rcv_ctxts, j;
size_t sz;
char *p;
char name[C_MAX_NAME];
struct hfi1_pportdata *ppd;
+ const char *bit_type_32 = ",32";
+ const int bit_type_32_sz = strlen(bit_type_32);
/* set up the stats timer; the add_timer is done at the end */
setup_timer(&dd->synth_stats_timer, update_synth_timer,
@@ -11527,49 +11841,57 @@ static int init_cntrs(struct hfi1_devdata *dd)
/* size names and determine how many we have*/
dd->ndevcntrs = 0;
sz = 0;
- index = 0;
for (i = 0; i < DEV_CNTR_LAST; i++) {
- hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
if (dev_cntrs[i].flags & CNTR_DISABLED) {
hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
continue;
}
if (dev_cntrs[i].flags & CNTR_VL) {
- hfi1_dbg_early("\tProcessing VL cntr\n");
- dev_cntrs[i].offset = index;
+ dev_cntrs[i].offset = dd->ndevcntrs;
for (j = 0; j < C_VL_COUNT; j++) {
- memset(name, '\0', C_MAX_NAME);
snprintf(name, C_MAX_NAME, "%s%d",
- dev_cntrs[i].name,
- vl_from_idx(j));
+ dev_cntrs[i].name, vl_from_idx(j));
+ sz += strlen(name);
+ /* Add ",32" for 32-bit counters */
+ if (dev_cntrs[i].flags & CNTR_32BIT)
+ sz += bit_type_32_sz;
+ sz++;
+ dd->ndevcntrs++;
+ }
+ } else if (dev_cntrs[i].flags & CNTR_SDMA) {
+ dev_cntrs[i].offset = dd->ndevcntrs;
+ for (j = 0; j < dd->chip_sdma_engines; j++) {
+ snprintf(name, C_MAX_NAME, "%s%d",
+ dev_cntrs[i].name, j);
sz += strlen(name);
+ /* Add ",32" for 32-bit counters */
+ if (dev_cntrs[i].flags & CNTR_32BIT)
+ sz += bit_type_32_sz;
sz++;
- hfi1_dbg_early("\t\t%s\n", name);
dd->ndevcntrs++;
- index++;
}
} else {
- /* +1 for newline */
+ /* +1 for newline. */
sz += strlen(dev_cntrs[i].name) + 1;
+ /* Add ",32" for 32-bit counters */
+ if (dev_cntrs[i].flags & CNTR_32BIT)
+ sz += bit_type_32_sz;
+ dev_cntrs[i].offset = dd->ndevcntrs;
dd->ndevcntrs++;
- dev_cntrs[i].offset = index;
- index++;
- hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
}
}
/* allocate space for the counter values */
- dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+ dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
if (!dd->cntrs)
goto bail;
- dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+ dd->scntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
if (!dd->scntrs)
goto bail;
-
/* allocate space for the counter names */
dd->cntrnameslen = sz;
dd->cntrnames = kmalloc(sz, GFP_KERNEL);
@@ -11577,27 +11899,51 @@ static int init_cntrs(struct hfi1_devdata *dd)
goto bail;
/* fill in the names */
- for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
+ for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) {
if (dev_cntrs[i].flags & CNTR_DISABLED) {
/* Nothing */
- } else {
- if (dev_cntrs[i].flags & CNTR_VL) {
- for (j = 0; j < C_VL_COUNT; j++) {
- memset(name, '\0', C_MAX_NAME);
- snprintf(name, C_MAX_NAME, "%s%d",
- dev_cntrs[i].name,
- vl_from_idx(j));
- memcpy(p, name, strlen(name));
- p += strlen(name);
- *p++ = '\n';
+ } else if (dev_cntrs[i].flags & CNTR_VL) {
+ for (j = 0; j < C_VL_COUNT; j++) {
+ snprintf(name, C_MAX_NAME, "%s%d",
+ dev_cntrs[i].name,
+ vl_from_idx(j));
+ memcpy(p, name, strlen(name));
+ p += strlen(name);
+
+ /* Counter is 32 bits */
+ if (dev_cntrs[i].flags & CNTR_32BIT) {
+ memcpy(p, bit_type_32, bit_type_32_sz);
+ p += bit_type_32_sz;
}
- } else {
- memcpy(p, dev_cntrs[i].name,
- strlen(dev_cntrs[i].name));
- p += strlen(dev_cntrs[i].name);
+
+ *p++ = '\n';
+ }
+ } else if (dev_cntrs[i].flags & CNTR_SDMA) {
+ for (j = 0; j < dd->chip_sdma_engines; j++) {
+ snprintf(name, C_MAX_NAME, "%s%d",
+ dev_cntrs[i].name, j);
+ memcpy(p, name, strlen(name));
+ p += strlen(name);
+
+ /* Counter is 32 bits */
+ if (dev_cntrs[i].flags & CNTR_32BIT) {
+ memcpy(p, bit_type_32, bit_type_32_sz);
+ p += bit_type_32_sz;
+ }
+
*p++ = '\n';
}
- index++;
+ } else {
+ memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name));
+ p += strlen(dev_cntrs[i].name);
+
+ /* Counter is 32 bits */
+ if (dev_cntrs[i].flags & CNTR_32BIT) {
+ memcpy(p, bit_type_32, bit_type_32_sz);
+ p += bit_type_32_sz;
+ }
+
+ *p++ = '\n';
}
}
@@ -11620,31 +11966,31 @@ static int init_cntrs(struct hfi1_devdata *dd)
sz = 0;
dd->nportcntrs = 0;
for (i = 0; i < PORT_CNTR_LAST; i++) {
- hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
if (port_cntrs[i].flags & CNTR_DISABLED) {
hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
continue;
}
if (port_cntrs[i].flags & CNTR_VL) {
- hfi1_dbg_early("\tProcessing VL cntr\n");
port_cntrs[i].offset = dd->nportcntrs;
for (j = 0; j < C_VL_COUNT; j++) {
- memset(name, '\0', C_MAX_NAME);
snprintf(name, C_MAX_NAME, "%s%d",
- port_cntrs[i].name,
- vl_from_idx(j));
+ port_cntrs[i].name, vl_from_idx(j));
sz += strlen(name);
+ /* Add ",32" for 32-bit counters */
+ if (port_cntrs[i].flags & CNTR_32BIT)
+ sz += bit_type_32_sz;
sz++;
- hfi1_dbg_early("\t\t%s\n", name);
dd->nportcntrs++;
}
} else {
- /* +1 for newline */
+ /* +1 for newline */
sz += strlen(port_cntrs[i].name) + 1;
+ /* Add ",32" for 32-bit counters */
+ if (port_cntrs[i].flags & CNTR_32BIT)
+ sz += bit_type_32_sz;
port_cntrs[i].offset = dd->nportcntrs;
dd->nportcntrs++;
- hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
}
}
@@ -11661,18 +12007,30 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (port_cntrs[i].flags & CNTR_VL) {
for (j = 0; j < C_VL_COUNT; j++) {
- memset(name, '\0', C_MAX_NAME);
snprintf(name, C_MAX_NAME, "%s%d",
- port_cntrs[i].name,
- vl_from_idx(j));
+ port_cntrs[i].name, vl_from_idx(j));
memcpy(p, name, strlen(name));
p += strlen(name);
+
+ /* Counter is 32 bits */
+ if (port_cntrs[i].flags & CNTR_32BIT) {
+ memcpy(p, bit_type_32, bit_type_32_sz);
+ p += bit_type_32_sz;
+ }
+
*p++ = '\n';
}
} else {
memcpy(p, port_cntrs[i].name,
strlen(port_cntrs[i].name));
p += strlen(port_cntrs[i].name);
+
+ /* Counter is 32 bits */
+ if (port_cntrs[i].flags & CNTR_32BIT) {
+ memcpy(p, bit_type_32, bit_type_32_sz);
+ p += bit_type_32_sz;
+ }
+
*p++ = '\n';
}
}
@@ -11700,14 +12058,13 @@ bail:
return -ENOMEM;
}
-
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
{
switch (chip_lstate) {
default:
dd_dev_err(dd,
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
- chip_lstate);
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+ chip_lstate);
/* fall through */
case LSTATE_DOWN:
return IB_PORT_DOWN;
@@ -11726,7 +12083,7 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
switch (chip_pstate & 0xf0) {
default:
dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
- chip_pstate);
+ chip_pstate);
/* fall through */
case PLS_DISABLED:
return IB_PORTPHYSSTATE_DISABLED;
@@ -11792,7 +12149,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
if (new_state != ppd->lstate) {
dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(new_state), new_state);
+ opa_lstate_name(new_state), new_state);
ppd->lstate = new_state;
}
/*
@@ -11851,18 +12208,17 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
{
- static u32 remembered_state = 0xff;
u32 pstate;
u32 ib_pstate;
pstate = read_physical_state(ppd->dd);
ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
- if (remembered_state != ib_pstate) {
+ if (ppd->last_pstate != ib_pstate) {
dd_dev_info(ppd->dd,
- "%s: physical state changed to %s (0x%x), phy 0x%x\n",
- __func__, opa_pstate_name(ib_pstate), ib_pstate,
- pstate);
- remembered_state = ib_pstate;
+ "%s: physical state changed to %s (0x%x), phy 0x%x\n",
+ __func__, opa_pstate_name(ib_pstate), ib_pstate,
+ pstate);
+ ppd->last_pstate = ib_pstate;
}
return ib_pstate;
}
@@ -11906,7 +12262,7 @@ u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
int hfi1_init_ctxt(struct send_context *sc)
{
- if (sc != NULL) {
+ if (sc) {
struct hfi1_devdata *dd = sc->dd;
u64 reg;
u8 set = (sc->type == SC_USER ?
@@ -11963,34 +12319,14 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable)
* In HFI, the mask needs to be 1 to allow interrupts.
*/
if (enable) {
- u64 cce_int_mask;
- const int qsfp1_int_smask = QSFP1_INT % 64;
- const int qsfp2_int_smask = QSFP2_INT % 64;
-
/* enable all interrupts */
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
+ write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
- /*
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
- * the index of the appropriate CSR in the CCEIntMask CSR array
- */
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
- (8*(QSFP1_INT/64)));
- if (dd->hfi1_id) {
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
- write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
- cce_int_mask);
- } else {
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
- write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
- cce_int_mask);
- }
+ init_qsfp_int(dd);
} else {
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+ write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
}
}
@@ -12002,7 +12338,7 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
int i;
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
+ write_csr(dd, CCE_INT_CLEAR + (8 * i), ~(u64)0);
write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
@@ -12037,10 +12373,9 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
struct hfi1_msix_entry *me = dd->msix_entries;
for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (me->arg == NULL) /* => no irq, no affinity */
- break;
- irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
- NULL);
+ if (!me->arg) /* => no irq, no affinity */
+ continue;
+ hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
free_irq(me->msix.vector, me->arg);
}
} else {
@@ -12061,8 +12396,6 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
}
/* clean structures */
- for (i = 0; i < dd->num_msix_entries; i++)
- free_cpumask_var(dd->msix_entries[i].mask);
kfree(dd->msix_entries);
dd->msix_entries = NULL;
dd->num_msix_entries = 0;
@@ -12085,10 +12418,10 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
/* direct the chip source to the given MSI-X interrupt */
m = isrc / 8;
n = isrc % 8;
- reg = read_csr(dd, CCE_INT_MAP + (8*m));
- reg &= ~((u64)0xff << (8*n));
- reg |= ((u64)msix_intr & 0xff) << (8*n);
- write_csr(dd, CCE_INT_MAP + (8*m), reg);
+ reg = read_csr(dd, CCE_INT_MAP + (8 * m));
+ reg &= ~((u64)0xff << (8 * n));
+ reg |= ((u64)msix_intr & 0xff) << (8 * n);
+ write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
static void remap_sdma_interrupts(struct hfi1_devdata *dd,
@@ -12101,12 +12434,12 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress
* SDMAIdle
*/
- remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
+ remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
+ msix_intr);
+ remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
+ msix_intr);
+ remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
+ msix_intr);
}
static int request_intx_irq(struct hfi1_devdata *dd)
@@ -12116,10 +12449,10 @@ static int request_intx_irq(struct hfi1_devdata *dd)
snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
dd->unit);
ret = request_irq(dd->pcidev->irq, general_interrupt,
- IRQF_SHARED, dd->intx_name, dd);
+ IRQF_SHARED, dd->intx_name, dd);
if (ret)
dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
- ret);
+ ret);
else
dd->requested_intx_irq = 1;
return ret;
@@ -12127,70 +12460,20 @@ static int request_intx_irq(struct hfi1_devdata *dd)
static int request_msix_irqs(struct hfi1_devdata *dd)
{
- const struct cpumask *local_mask;
- cpumask_var_t def, rcv;
- bool def_ret, rcv_ret;
int first_general, last_general;
int first_sdma, last_sdma;
int first_rx, last_rx;
- int first_cpu, curr_cpu;
- int rcv_cpu, sdma_cpu;
- int i, ret = 0, possible;
- int ht;
+ int i, ret = 0;
/* calculate the ranges we are going to use */
first_general = 0;
- first_sdma = last_general = first_general + 1;
- first_rx = last_sdma = first_sdma + dd->num_sdma;
+ last_general = first_general + 1;
+ first_sdma = last_general;
+ last_sdma = first_sdma + dd->num_sdma;
+ first_rx = last_sdma;
last_rx = first_rx + dd->n_krcv_queues;
/*
- * Interrupt affinity.
- *
- * non-rcv avail gets a default mask that
- * starts as possible cpus with threads reset
- * and each rcv avail reset.
- *
- * rcv avail gets node relative 1 wrapping back
- * to the node relative 1 as necessary.
- *
- */
- local_mask = cpumask_of_pcibus(dd->pcidev->bus);
- /* if first cpu is invalid, use NUMA 0 */
- if (cpumask_first(local_mask) >= nr_cpu_ids)
- local_mask = topology_core_cpumask(0);
-
- def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
- rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
- if (!def_ret || !rcv_ret)
- goto bail;
- /* use local mask as default */
- cpumask_copy(def, local_mask);
- possible = cpumask_weight(def);
- /* disarm threads from default */
- ht = cpumask_weight(
- topology_sibling_cpumask(cpumask_first(local_mask)));
- for (i = possible/ht; i < possible; i++)
- cpumask_clear_cpu(i, def);
- /* def now has full cores on chosen node*/
- first_cpu = cpumask_first(def);
- if (nr_cpu_ids >= first_cpu)
- first_cpu++;
- curr_cpu = first_cpu;
-
- /* One context is reserved as control context */
- for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
- cpumask_clear_cpu(curr_cpu, def);
- cpumask_set_cpu(curr_cpu, rcv);
- curr_cpu = cpumask_next(curr_cpu, def);
- if (curr_cpu >= nr_cpu_ids)
- break;
- }
- /* def mask has non-rcv, rcv has recv mask */
- rcv_cpu = cpumask_first(rcv);
- sdma_cpu = cpumask_first(def);
-
- /*
* Sanity check - the code expects all SDMA chip source
* interrupts to be in the same CSR, starting at bit 0. Verify
* that this is true by checking the bit location of the start.
@@ -12215,6 +12498,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
snprintf(me->name, sizeof(me->name),
DRIVER_NAME "_%d", dd->unit);
err_info = "general";
+ me->type = IRQ_GENERAL;
} else if (first_sdma <= i && i < last_sdma) {
idx = i - first_sdma;
sde = &dd->per_sdma[idx];
@@ -12224,6 +12508,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
DRIVER_NAME "_%d sdma%d", dd->unit, idx);
err_info = "sdma";
remap_sdma_interrupts(dd, idx, i);
+ me->type = IRQ_SDMA;
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = dd->rcd[idx];
@@ -12234,9 +12519,9 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
* Set the interrupt register and mask for this
* context's interrupt.
*/
- rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START+idx) % 64);
+ ((IS_RCVAVAIL_START + idx) % 64);
handler = receive_context_interrupt;
thread = receive_context_thread;
arg = rcd;
@@ -12244,25 +12529,27 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
err_info = "receive context";
remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+ me->type = IRQ_RCVCTXT;
} else {
/* not in our expected range - complain, then
- ignore it */
+ * ignore it
+ */
dd_dev_err(dd,
- "Unexpected extra MSI-X interrupt %d\n", i);
+ "Unexpected extra MSI-X interrupt %d\n", i);
continue;
}
/* no argument, no interrupt */
- if (arg == NULL)
+ if (!arg)
continue;
/* make sure the name is terminated */
- me->name[sizeof(me->name)-1] = 0;
+ me->name[sizeof(me->name) - 1] = 0;
ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
- me->name, arg);
+ me->name, arg);
if (ret) {
dd_dev_err(dd,
- "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
- err_info, me->msix.vector, idx, ret);
+ "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
+ err_info, me->msix.vector, idx, ret);
return ret;
}
/*
@@ -12271,52 +12558,13 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
*/
me->arg = arg;
- if (!zalloc_cpumask_var(
- &dd->msix_entries[i].mask,
- GFP_KERNEL))
- goto bail;
- if (handler == sdma_interrupt) {
- dd_dev_info(dd, "sdma engine %d cpu %d\n",
- sde->this_idx, sdma_cpu);
- sde->cpu = sdma_cpu;
- cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
- sdma_cpu = cpumask_next(sdma_cpu, def);
- if (sdma_cpu >= nr_cpu_ids)
- sdma_cpu = cpumask_first(def);
- } else if (handler == receive_context_interrupt) {
- dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
- (rcd->ctxt == HFI1_CTRL_CTXT) ?
- cpumask_first(def) : rcv_cpu);
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- /* map to first default */
- cpumask_set_cpu(cpumask_first(def),
- dd->msix_entries[i].mask);
- } else {
- cpumask_set_cpu(rcv_cpu,
- dd->msix_entries[i].mask);
- rcv_cpu = cpumask_next(rcv_cpu, rcv);
- if (rcv_cpu >= nr_cpu_ids)
- rcv_cpu = cpumask_first(rcv);
- }
- } else {
- /* otherwise first def */
- dd_dev_info(dd, "%s cpu %d\n",
- err_info, cpumask_first(def));
- cpumask_set_cpu(
- cpumask_first(def), dd->msix_entries[i].mask);
- }
- irq_set_affinity_hint(
- dd->msix_entries[i].msix.vector,
- dd->msix_entries[i].mask);
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret)
+ dd_dev_err(dd,
+ "unable to pin IRQ %d\n", ret);
}
-out:
- free_cpumask_var(def);
- free_cpumask_var(rcv);
return ret;
-bail:
- ret = -ENOMEM;
- goto out;
}
/*
@@ -12333,7 +12581,7 @@ static void reset_interrupts(struct hfi1_devdata *dd)
/* all chip interrupts map to MSI-X 0 */
for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
- write_csr(dd, CCE_INT_MAP + (8*i), 0);
+ write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
static int set_up_interrupts(struct hfi1_devdata *dd)
@@ -12442,7 +12690,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
else
- num_kernel_contexts = num_online_nodes();
+ num_kernel_contexts = num_online_nodes() + 1;
num_kernel_contexts =
max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
/*
@@ -12483,13 +12731,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
dd->first_user_ctxt = num_kernel_contexts;
+ dd->num_user_contexts = num_user_contexts;
dd->freectxts = num_user_contexts;
dd_dev_info(dd,
- "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
- (int)dd->chip_rcv_contexts,
- (int)dd->num_rcv_contexts,
- (int)dd->n_krcv_queues,
- (int)dd->num_rcv_contexts - dd->n_krcv_queues);
+ "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
+ (int)dd->chip_rcv_contexts,
+ (int)dd->num_rcv_contexts,
+ (int)dd->n_krcv_queues,
+ (int)dd->num_rcv_contexts - dd->n_krcv_queues);
/*
* Receive array allocation:
@@ -12515,8 +12764,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
dd->rcv_entries.group_size;
dd_dev_info(dd,
- "RcvArray group count too high, change to %u\n",
- dd->rcv_entries.ngroups);
+ "RcvArray group count too high, change to %u\n",
+ dd->rcv_entries.ngroups);
dd->rcv_entries.nctxt_extra = 0;
}
/*
@@ -12582,7 +12831,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* CceIntMap */
for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
- write_csr(dd, CCE_INT_MAP+(8*i), 0);
+ write_csr(dd, CCE_INT_MAP + (8 * i), 0);
/* SendCtxtCreditReturnAddr */
for (i = 0; i < dd->chip_send_contexts; i++)
@@ -12590,8 +12839,10 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* PIO Send buffers */
/* SDMA Send buffers */
- /* These are not normally read, and (presently) have no method
- to be read, so are not pre-initialized */
+ /*
+ * These are not normally read, and (presently) have no method
+ * to be read, so are not pre-initialized
+ */
/* RcvHdrAddr */
/* RcvHdrTailAddr */
@@ -12600,13 +12851,13 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
- write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
+ write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0);
}
/* RcvArray */
for (i = 0; i < dd->chip_rcv_array_count; i++)
- write_csr(dd, RCV_ARRAY + (8*i),
- RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+ write_csr(dd, RCV_ARRAY + (8 * i),
+ RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
/* RcvQPMapTable */
for (i = 0; i < 32; i++)
@@ -12638,8 +12889,8 @@ static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
return;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
- "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
- status_bits, reg & status_bits);
+ "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
+ status_bits, reg & status_bits);
return;
}
udelay(1);
@@ -12671,7 +12922,7 @@ static void reset_cce_csrs(struct hfi1_devdata *dd)
for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
- CCE_MSIX_TABLE_UPPER_RESETCSR);
+ CCE_MSIX_TABLE_UPPER_RESETCSR);
}
for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
/* CCE_MSIX_PBA read-only */
@@ -12691,91 +12942,6 @@ static void reset_cce_csrs(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
}
-/* set ASIC CSRs to chip reset defaults */
-static void reset_asic_csrs(struct hfi1_devdata *dd)
-{
- int i;
-
- /*
- * If the HFIs are shared between separate nodes or VMs,
- * then more will need to be done here. One idea is a module
- * parameter that returns early, letting the first power-on or
- * a known first load do the reset and blocking all others.
- */
-
- if (!(dd->flags & HFI1_DO_INIT_ASIC))
- return;
-
- if (dd->icode != ICODE_FPGA_EMULATION) {
- /* emulation does not have an SBus - leave these alone */
- /*
- * All writes to ASIC_CFG_SBUS_REQUEST do something.
- * Notes:
- * o The reset is not zero if aimed at the core. See the
- * SBus documentation for details.
- * o If the SBus firmware has been updated (e.g. by the BIOS),
- * will the reset revert that?
- */
- /* ASIC_CFG_SBUS_REQUEST leave alone */
- write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
- }
- /* ASIC_SBUS_RESULT read-only */
- write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
- for (i = 0; i < ASIC_NUM_SCRATCH; i++)
- write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
- write_csr(dd, ASIC_CFG_MUTEX, 0); /* this will clear it */
-
- /* We might want to retain this state across FLR if we ever use it */
- write_csr(dd, ASIC_CFG_DRV_STR, 0);
-
- /* ASIC_CFG_THERM_POLL_EN leave alone */
- /* ASIC_STS_THERM read-only */
- /* ASIC_CFG_RESET leave alone */
-
- write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
- /* ASIC_PCIE_SD_HOST_STATUS read-only */
- write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
- write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
- /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
- write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
- /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
- /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
- for (i = 0; i < 16; i++)
- write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
-
- /* ASIC_GPIO_IN read-only */
- write_csr(dd, ASIC_GPIO_OE, 0);
- write_csr(dd, ASIC_GPIO_INVERT, 0);
- write_csr(dd, ASIC_GPIO_OUT, 0);
- write_csr(dd, ASIC_GPIO_MASK, 0);
- /* ASIC_GPIO_STATUS read-only */
- write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
- /* ASIC_GPIO_FORCE leave alone */
-
- /* ASIC_QSFP1_IN read-only */
- write_csr(dd, ASIC_QSFP1_OE, 0);
- write_csr(dd, ASIC_QSFP1_INVERT, 0);
- write_csr(dd, ASIC_QSFP1_OUT, 0);
- write_csr(dd, ASIC_QSFP1_MASK, 0);
- /* ASIC_QSFP1_STATUS read-only */
- write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
- /* ASIC_QSFP1_FORCE leave alone */
-
- /* ASIC_QSFP2_IN read-only */
- write_csr(dd, ASIC_QSFP2_OE, 0);
- write_csr(dd, ASIC_QSFP2_INVERT, 0);
- write_csr(dd, ASIC_QSFP2_OUT, 0);
- write_csr(dd, ASIC_QSFP2_MASK, 0);
- /* ASIC_QSFP2_STATUS read-only */
- write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
- /* ASIC_QSFP2_FORCE leave alone */
-
- write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
- /* this also writes a NOP command, clearing paging mode */
- write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
- write_csr(dd, ASIC_EEP_DATA, 0);
-}
-
/* set MISC CSRs to chip reset defaults */
static void reset_misc_csrs(struct hfi1_devdata *dd)
{
@@ -12786,8 +12952,10 @@ static void reset_misc_csrs(struct hfi1_devdata *dd)
write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
}
- /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
- only be written 128-byte chunks */
+ /*
+ * MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
+ * only be written 128-byte chunks
+ */
/* init RSA engine to clear lingering errors */
write_csr(dd, MISC_CFG_RSA_CMD, 1);
write_csr(dd, MISC_CFG_RSA_MU, 0);
@@ -12843,18 +13011,17 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
write_csr(dd, SEND_ERR_CLEAR, ~0ull);
/* SEND_ERR_FORCE read-only */
for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
- write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
+ write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
- write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
- for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
- write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
+ write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
+ for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+ write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
- write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
+ write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
- write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
+ write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0);
write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
- write_csr(dd, SEND_CM_GLOBAL_CREDIT,
- SEND_CM_GLOBAL_CREDIT_RESETCSR);
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR);
/* SEND_CM_CREDIT_USED_STATUS read-only */
write_csr(dd, SEND_CM_TIMER_CTRL, 0);
write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
@@ -12862,7 +13029,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
for (i = 0; i < TXE_NUM_DATA_VL; i++)
- write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+ write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
write_csr(dd, SEND_CM_CREDIT_VL15, 0);
/* SEND_CM_CREDIT_USED_VL read-only */
/* SEND_CM_CREDIT_USED_VL15 read-only */
@@ -12948,8 +13115,8 @@ static void init_rbufs(struct hfi1_devdata *dd)
*/
if (count++ > 500) {
dd_dev_err(dd,
- "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
- __func__, reg);
+ "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
+ __func__, reg);
break;
}
udelay(2); /* do not busy-wait the CSR */
@@ -12978,8 +13145,8 @@ static void init_rbufs(struct hfi1_devdata *dd)
/* give up after 100us - slowest possible at 33MHz is 73us */
if (count++ > 50) {
dd_dev_err(dd,
- "%s: RcvStatus.RxRbufInit not set, continuing\n",
- __func__);
+ "%s: RcvStatus.RxRbufInit not set, continuing\n",
+ __func__);
break;
}
}
@@ -13005,7 +13172,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_VL15, 0);
/* this is a clear-down */
write_csr(dd, RCV_ERR_INFO,
- RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
+ RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
/* RCV_ERR_STATUS read-only */
write_csr(dd, RCV_ERR_MASK, 0);
write_csr(dd, RCV_ERR_CLEAR, ~0ull);
@@ -13051,8 +13218,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
/* RCV_EGR_OFFSET_TAIL read-only */
for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
- write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
- 0);
+ write_uctxt_csr(dd, i,
+ RCV_TID_FLOW_TABLE + (8 * j), 0);
}
}
}
@@ -13154,7 +13321,7 @@ static void init_chip(struct hfi1_devdata *dd)
write_csr(dd, RCV_CTXT_CTRL, 0);
/* mask all interrupt sources */
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+ write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
/*
* DC Reset: do a full DC reset before the register clear.
@@ -13163,7 +13330,7 @@ static void init_chip(struct hfi1_devdata *dd)
* across the clear.
*/
write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
- (void) read_csr(dd, CCE_DC_CTRL);
+ (void)read_csr(dd, CCE_DC_CTRL);
if (use_flr) {
/*
@@ -13184,22 +13351,19 @@ static void init_chip(struct hfi1_devdata *dd)
hfi1_pcie_flr(dd);
restore_pci_variables(dd);
}
-
- reset_asic_csrs(dd);
} else {
dd_dev_info(dd, "Resetting CSRs with writes\n");
reset_cce_csrs(dd);
reset_txe_csrs(dd);
reset_rxe_csrs(dd);
- reset_asic_csrs(dd);
reset_misc_csrs(dd);
}
/* clear the DC reset */
write_csr(dd, CCE_DC_CTRL, 0);
/* Set the LED off */
- if (is_ax(dd))
- setextled(dd, 0);
+ setextled(dd, 0);
+
/*
* Clear the QSFP reset.
* An FLR enforces a 0 on all out pins. The driver does not touch
@@ -13212,6 +13376,7 @@ static void init_chip(struct hfi1_devdata *dd)
*/
write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
+ init_chip_resources(dd);
}
static void init_early_variables(struct hfi1_devdata *dd)
@@ -13252,12 +13417,12 @@ static void init_kdeth_qp(struct hfi1_devdata *dd)
kdeth_qp = DEFAULT_KDETH_QP;
write_csr(dd, SEND_BTH_QP,
- (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
- << SEND_BTH_QP_KDETH_QP_SHIFT);
+ (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+ SEND_BTH_QP_KDETH_QP_SHIFT);
write_csr(dd, RCV_BTH_QP,
- (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
- << RCV_BTH_QP_KDETH_QP_SHIFT);
+ (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+ RCV_BTH_QP_KDETH_QP_SHIFT);
}
/**
@@ -13382,22 +13547,21 @@ static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
/* add rule0 */
write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
- RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
- << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
- 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
+ RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK <<
+ RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
+ 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
- LRH_BTH_MATCH_OFFSET
- << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
- LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
- LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
- ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
- QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
- ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
+ LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
+ LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
+ LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
+ ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
+ QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
+ ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
- LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
- LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
- LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
- LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
+ LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
+ LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
+ LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
+ LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
/* Enable RSM */
add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
kfree(rsmmap);
@@ -13415,9 +13579,8 @@ static void init_rxe(struct hfi1_devdata *dd)
/* enable all receive errors */
write_csr(dd, RCV_ERR_MASK, ~0ull);
/* setup QPN map table - start where VL15 context leaves off */
- init_qos(
- dd,
- dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
+ init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ?
+ MIN_KERNEL_KCTXTS : 0);
/*
* make sure RcvCtrl.RcvWcb <= PCIe Device Control
* Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
@@ -13454,36 +13617,33 @@ static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
u32 csr0to3, u32 csr4to7)
{
write_csr(dd, csr0to3,
- 0ull <<
- SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
- | 1ull <<
- SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
- | 2ull * cu <<
- SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
- | 4ull * cu <<
- SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
+ 0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT |
+ 1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT |
+ 2ull * cu <<
+ SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT |
+ 4ull * cu <<
+ SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
write_csr(dd, csr4to7,
- 8ull * cu <<
- SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
- | 16ull * cu <<
- SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
- | 32ull * cu <<
- SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
- | 64ull * cu <<
- SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
-
+ 8ull * cu <<
+ SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT |
+ 16ull * cu <<
+ SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT |
+ 32ull * cu <<
+ SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT |
+ 64ull * cu <<
+ SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
}
static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
{
assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
- SEND_CM_LOCAL_AU_TABLE4_TO7);
+ SEND_CM_LOCAL_AU_TABLE4_TO7);
}
void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
{
assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
- SEND_CM_REMOTE_AU_TABLE4_TO7);
+ SEND_CM_REMOTE_AU_TABLE4_TO7);
}
static void init_txe(struct hfi1_devdata *dd)
@@ -13586,9 +13746,9 @@ int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
int ret = 0;
u64 reg;
- if (ctxt < dd->num_rcv_contexts)
+ if (ctxt < dd->num_rcv_contexts) {
rcd = dd->rcd[ctxt];
- else {
+ } else {
ret = -EINVAL;
goto done;
}
@@ -13614,9 +13774,9 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
int ret = 0;
u64 reg;
- if (ctxt < dd->num_rcv_contexts)
+ if (ctxt < dd->num_rcv_contexts) {
rcd = dd->rcd[ctxt];
- else {
+ } else {
ret = -EINVAL;
goto done;
}
@@ -13639,24 +13799,26 @@ done:
*/
void hfi1_start_cleanup(struct hfi1_devdata *dd)
{
+ aspm_exit(dd);
free_cntrs(dd);
free_rcverr(dd);
clean_up_interrupts(dd);
+ finish_chip_resources(dd);
}
#define HFI_BASE_GUID(dev) \
((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
/*
- * Certain chip functions need to be initialized only once per asic
- * instead of per-device. This function finds the peer device and
- * checks whether that chip initialization needs to be done by this
- * device.
+ * Information can be shared between the two HFIs on the same ASIC
+ * in the same OS. This function finds the peer device and sets
+ * up a shared structure.
*/
-static void asic_should_init(struct hfi1_devdata *dd)
+static int init_asic_data(struct hfi1_devdata *dd)
{
unsigned long flags;
struct hfi1_devdata *tmp, *peer = NULL;
+ int ret = 0;
spin_lock_irqsave(&hfi1_devs_lock, flags);
/* Find our peer device */
@@ -13668,13 +13830,21 @@ static void asic_should_init(struct hfi1_devdata *dd)
}
}
- /*
- * "Claim" the ASIC for initialization if it hasn't been
- " "claimed" yet.
- */
- if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
- dd->flags |= HFI1_DO_INIT_ASIC;
+ if (peer) {
+ dd->asic_data = peer->asic_data;
+ } else {
+ dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+ if (!dd->asic_data) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ mutex_init(&dd->asic_data->asic_resource_mutex);
+ }
+ dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
+
+done:
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ return ret;
}
/*
@@ -13694,7 +13864,7 @@ static int obtain_boardname(struct hfi1_devdata *dd)
ret = read_hfi1_efi_var(dd, "description", &size,
(void **)&dd->boardname);
if (ret) {
- dd_dev_err(dd, "Board description not found\n");
+ dd_dev_info(dd, "Board description not found\n");
/* use generic description */
dd->boardname = kstrdup(generic, GFP_KERNEL);
if (!dd->boardname)
@@ -13703,6 +13873,50 @@ static int obtain_boardname(struct hfi1_devdata *dd)
return 0;
}
+/*
+ * Check the interrupt registers to make sure that they are mapped correctly.
+ * It is intended to help user identify any mismapping by VMM when the driver
+ * is running in a VM. This function should only be called before interrupt
+ * is set up properly.
+ *
+ * Return 0 on success, -EINVAL on failure.
+ */
+static int check_int_registers(struct hfi1_devdata *dd)
+{
+ u64 reg;
+ u64 all_bits = ~(u64)0;
+ u64 mask;
+
+ /* Clear CceIntMask[0] to avoid raising any interrupts */
+ mask = read_csr(dd, CCE_INT_MASK);
+ write_csr(dd, CCE_INT_MASK, 0ull);
+ reg = read_csr(dd, CCE_INT_MASK);
+ if (reg)
+ goto err_exit;
+
+ /* Clear all interrupt status bits */
+ write_csr(dd, CCE_INT_CLEAR, all_bits);
+ reg = read_csr(dd, CCE_INT_STATUS);
+ if (reg)
+ goto err_exit;
+
+ /* Set all interrupt status bits */
+ write_csr(dd, CCE_INT_FORCE, all_bits);
+ reg = read_csr(dd, CCE_INT_STATUS);
+ if (reg != all_bits)
+ goto err_exit;
+
+ /* Restore the interrupt mask */
+ write_csr(dd, CCE_INT_CLEAR, all_bits);
+ write_csr(dd, CCE_INT_MASK, mask);
+
+ return 0;
+err_exit:
+ write_csr(dd, CCE_INT_MASK, mask);
+ dd_dev_err(dd, "Interrupt registers not properly mapped by VMM\n");
+ return -EINVAL;
+}
+
/**
* Allocate and initialize the device structure for the hfi.
* @dev: the pci_dev for hfi1_ib device
@@ -13727,9 +13941,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"RTL FPGA emulation",
"Functional simulator"
};
+ struct pci_dev *parent = pdev->bus->self;
- dd = hfi1_alloc_devdata(pdev,
- NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+ sizeof(struct hfi1_pportdata));
if (IS_ERR(dd))
goto bail;
ppd = dd->pport;
@@ -13750,8 +13965,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* link width active is 0 when link is down */
/* link width downgrade active is 0 when link is down */
- if (num_vls < HFI1_MIN_VLS_SUPPORTED
- || num_vls > HFI1_MAX_VLS_SUPPORTED) {
+ if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
+ num_vls > HFI1_MAX_VLS_SUPPORTED) {
hfi1_early_err(&pdev->dev,
"Invalid num_vls %u, using %u VLs\n",
num_vls, HFI1_MAX_VLS_SUPPORTED);
@@ -13759,6 +13974,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
}
ppd->vls_supported = num_vls;
ppd->vls_operational = ppd->vls_supported;
+ ppd->actual_vls_operational = ppd->vls_supported;
/* Set the default MTU. */
for (vl = 0; vl < num_vls; vl++)
dd->vld[vl].mtu = hfi1_max_mtu;
@@ -13778,6 +13994,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* start in offline */
ppd->host_link_state = HLS_DN_OFFLINE;
init_vl_arb_caches(ppd);
+ ppd->last_pstate = 0xff; /* invalid value */
}
dd->link_default = HLS_DN_POLL;
@@ -13803,8 +14020,21 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
& CCE_REVISION_CHIP_REV_MINOR_MASK;
- /* obtain the hardware ID - NOT related to unit, which is a
- software enumeration */
+ /*
+ * Check interrupt registers mapping if the driver has no access to
+ * the upstream component. In this case, it is likely that the driver
+ * is running in a VM.
+ */
+ if (!parent) {
+ ret = check_int_registers(dd);
+ if (ret)
+ goto bail_cleanup;
+ }
+
+ /*
+ * obtain the hardware ID - NOT related to unit, which is a
+ * software enumeration
+ */
reg = read_csr(dd, CCE_REVISION2);
dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
& CCE_REVISION2_HFI_ID_MASK;
@@ -13812,8 +14042,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
- dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
- (int)dd->irev);
+ dd->icode < ARRAY_SIZE(inames) ?
+ inames[dd->icode] : "unknown", (int)dd->irev);
/* speeds the hardware can support */
dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
@@ -13842,6 +14072,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
num_vls, dd->chip_sdma_engines);
num_vls = dd->chip_sdma_engines;
ppd->vls_supported = dd->chip_sdma_engines;
+ ppd->vls_operational = ppd->vls_supported;
}
/*
@@ -13863,8 +14094,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* needs to be done before we look for the peer device */
read_guid(dd);
- /* should this device init the ASIC block? */
- asic_should_init(dd);
+ /* set up shared ASIC data with peer device */
+ ret = init_asic_data(dd);
+ if (ret)
+ goto bail_cleanup;
/* obtain chip sizes, reset chip CSRs */
init_chip(dd);
@@ -13874,6 +14107,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /* Needs to be called before hfi1_firmware_init */
+ get_platform_config(dd);
+
/* read in firmware */
ret = hfi1_firmware_init(dd);
if (ret)
@@ -13925,6 +14161,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* set up KDETH QP prefix in both RX and TX CSRs */
init_kdeth_qp(dd);
+ ret = hfi1_dev_affinity_init(dd);
+ if (ret)
+ goto bail_cleanup;
+
/* send contexts must be set up before receive contexts */
ret = init_send_contexts(dd);
if (ret)
@@ -14022,7 +14262,6 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
return (u16)delta_cycles;
}
-
/**
* create_pbc - build a pbc for transmission
* @flags: special case flags or-ed in built pbc
@@ -14078,10 +14317,15 @@ static int thermal_init(struct hfi1_devdata *dd)
int ret = 0;
if (dd->icode != ICODE_RTL_SILICON ||
- !(dd->flags & HFI1_DO_INIT_ASIC))
+ check_chip_resource(dd, CR_THERM_INIT, NULL))
return ret;
- acquire_hw_mutex(dd);
+ ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+ if (ret) {
+ THERM_FAILURE(dd, ret, "Acquire SBus");
+ return ret;
+ }
+
dd_dev_info(dd, "Initializing thermal sensor\n");
/* Disable polling of thermal readings */
write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
@@ -14128,8 +14372,14 @@ static int thermal_init(struct hfi1_devdata *dd)
/* Enable polling of thermal readings */
write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
+
+ /* Set initialized flag */
+ ret = acquire_chip_resource(dd, CR_THERM_INIT, 0);
+ if (ret)
+ THERM_FAILURE(dd, ret, "Unable to set thermal init flag");
+
done:
- release_hw_mutex(dd);
+ release_chip_resource(dd, CR_SBUS);
return ret;
}
@@ -14144,7 +14394,7 @@ static void handle_temp_err(struct hfi1_devdata *dd)
dd_dev_emerg(dd,
"Critical temperature reached! Forcing device into freeze mode!\n");
dd->flags |= HFI1_FORCED_FREEZE;
- start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
+ start_freeze_handling(ppd, FREEZE_SELF | FREEZE_ABORT);
/*
* Shut DC down as much and as quickly as possible.
*
@@ -14158,8 +14408,8 @@ static void handle_temp_err(struct hfi1_devdata *dd)
*/
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;
- set_physical_link_state(dd, PLS_OFFLINE |
- (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
+ set_physical_link_state(dd, (OPA_LINKDOWN_REASON_SMA_DISABLED << 8) |
+ PLS_OFFLINE);
/*
* Step 2: Shutdown LCB and 8051
* After shutdown, do not restore DC_CFG_RESET value.
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h
index 5b375ddc345d..4f3b878e43eb 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/staging/rdma/hfi1/chip.h
@@ -1,14 +1,13 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -79,8 +76,10 @@
#define PIO_CMASK 0x7ff /* counter mask for free and fill counters */
#define MAX_EAGER_ENTRIES 2048 /* max receive eager entries */
#define MAX_TID_PAIR_ENTRIES 1024 /* max receive expected pairs */
-/* Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
- at 64 bytes for all generation one devices */
+/*
+ * Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
+ * at 64 bytes for all generation one devices
+ */
#define CM_VAU 3
/* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
#define CM_GLOBAL_CREDITS 0x940
@@ -93,15 +92,15 @@
#define TXE_PIO_SEND (TXE + TXE_PIO_SEND_OFFSET)
/* PBC flags */
-#define PBC_INTR (1ull << 31)
+#define PBC_INTR BIT_ULL(31)
#define PBC_DC_INFO_SHIFT (30)
-#define PBC_DC_INFO (1ull << PBC_DC_INFO_SHIFT)
-#define PBC_TEST_EBP (1ull << 29)
-#define PBC_PACKET_BYPASS (1ull << 28)
-#define PBC_CREDIT_RETURN (1ull << 25)
-#define PBC_INSERT_BYPASS_ICRC (1ull << 24)
-#define PBC_TEST_BAD_ICRC (1ull << 23)
-#define PBC_FECN (1ull << 22)
+#define PBC_DC_INFO BIT_ULL(PBC_DC_INFO_SHIFT)
+#define PBC_TEST_EBP BIT_ULL(29)
+#define PBC_PACKET_BYPASS BIT_ULL(28)
+#define PBC_CREDIT_RETURN BIT_ULL(25)
+#define PBC_INSERT_BYPASS_ICRC BIT_ULL(24)
+#define PBC_TEST_BAD_ICRC BIT_ULL(23)
+#define PBC_FECN BIT_ULL(22)
/* PbcInsertHcrc field settings */
#define PBC_IHCRC_LKDETH 0x0 /* insert @ local KDETH offset */
@@ -212,7 +211,7 @@
#define PLS_CONFIGPHY_DEBOUCE 0x40
#define PLS_CONFIGPHY_ESTCOMM 0x41
#define PLS_CONFIGPHY_ESTCOMM_TXRX_HUNT 0x42
-#define PLS_CONFIGPHY_ESTcOMM_LOCAL_COMPLETE 0x43
+#define PLS_CONFIGPHY_ESTCOMM_LOCAL_COMPLETE 0x43
#define PLS_CONFIGPHY_OPTEQ 0x44
#define PLS_CONFIGPHY_OPTEQ_OPTIMIZING 0x44
#define PLS_CONFIGPHY_OPTEQ_LOCAL_COMPLETE 0x45
@@ -242,36 +241,37 @@
#define HCMD_SUCCESS 2
/* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR - error flags */
-#define SPICO_ROM_FAILED (1 << 0)
-#define UNKNOWN_FRAME (1 << 1)
-#define TARGET_BER_NOT_MET (1 << 2)
-#define FAILED_SERDES_INTERNAL_LOOPBACK (1 << 3)
-#define FAILED_SERDES_INIT (1 << 4)
-#define FAILED_LNI_POLLING (1 << 5)
-#define FAILED_LNI_DEBOUNCE (1 << 6)
-#define FAILED_LNI_ESTBCOMM (1 << 7)
-#define FAILED_LNI_OPTEQ (1 << 8)
-#define FAILED_LNI_VERIFY_CAP1 (1 << 9)
-#define FAILED_LNI_VERIFY_CAP2 (1 << 10)
-#define FAILED_LNI_CONFIGLT (1 << 11)
+#define SPICO_ROM_FAILED BIT(0)
+#define UNKNOWN_FRAME BIT(1)
+#define TARGET_BER_NOT_MET BIT(2)
+#define FAILED_SERDES_INTERNAL_LOOPBACK BIT(3)
+#define FAILED_SERDES_INIT BIT(4)
+#define FAILED_LNI_POLLING BIT(5)
+#define FAILED_LNI_DEBOUNCE BIT(6)
+#define FAILED_LNI_ESTBCOMM BIT(7)
+#define FAILED_LNI_OPTEQ BIT(8)
+#define FAILED_LNI_VERIFY_CAP1 BIT(9)
+#define FAILED_LNI_VERIFY_CAP2 BIT(10)
+#define FAILED_LNI_CONFIGLT BIT(11)
+#define HOST_HANDSHAKE_TIMEOUT BIT(12)
#define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
| FAILED_LNI_VERIFY_CAP1 \
| FAILED_LNI_VERIFY_CAP2 \
- | FAILED_LNI_CONFIGLT)
+ | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
/* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
-#define HOST_REQ_DONE (1 << 0)
-#define BC_PWR_MGM_MSG (1 << 1)
-#define BC_SMA_MSG (1 << 2)
-#define BC_BCC_UNKOWN_MSG (1 << 3)
-#define BC_IDLE_UNKNOWN_MSG (1 << 4)
-#define EXT_DEVICE_CFG_REQ (1 << 5)
-#define VERIFY_CAP_FRAME (1 << 6)
-#define LINKUP_ACHIEVED (1 << 7)
-#define LINK_GOING_DOWN (1 << 8)
-#define LINK_WIDTH_DOWNGRADED (1 << 9)
+#define HOST_REQ_DONE BIT(0)
+#define BC_PWR_MGM_MSG BIT(1)
+#define BC_SMA_MSG BIT(2)
+#define BC_BCC_UNKNOWN_MSG BIT(3)
+#define BC_IDLE_UNKNOWN_MSG BIT(4)
+#define EXT_DEVICE_CFG_REQ BIT(5)
+#define VERIFY_CAP_FRAME BIT(6)
+#define LINKUP_ACHIEVED BIT(7)
+#define LINK_GOING_DOWN BIT(8)
+#define LINK_WIDTH_DOWNGRADED BIT(9)
/* DC_DC8051_CFG_EXT_DEV_1.REQ_TYPE - 8051 host requests */
#define HREQ_LOAD_CONFIG 0x01
@@ -335,14 +335,14 @@
* the CSR fields hold multiples of this value.
*/
#define RCV_SHIFT 3
-#define RCV_INCREMENT (1 << RCV_SHIFT)
+#define RCV_INCREMENT BIT(RCV_SHIFT)
/*
* Receive header queue entry increment - the CSR holds multiples of
* this value.
*/
#define HDRQ_SIZE_SHIFT 5
-#define HDRQ_INCREMENT (1 << HDRQ_SIZE_SHIFT)
+#define HDRQ_INCREMENT BIT(HDRQ_SIZE_SHIFT)
/*
* Freeze handling flags
@@ -371,6 +371,9 @@
#define NUM_LANE_FIELDS 0x8
/* 8051 general register Field IDs */
+#define LINK_OPTIMIZATION_SETTINGS 0x00
+#define LINK_TUNING_PARAMETERS 0x02
+#define DC_HOST_COMM_SETTINGS 0x03
#define TX_SETTINGS 0x06
#define VERIFY_CAP_LOCAL_PHY 0x07
#define VERIFY_CAP_LOCAL_FABRIC 0x08
@@ -387,6 +390,10 @@
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
+/* 8051 lane specific register field IDs */
+#define TX_EQ_SETTINGS 0x00
+#define CHANNEL_LOSS_SETTINGS 0x05
+
/* Lane ID for general configuration registers */
#define GENERAL_CONFIG 4
@@ -511,8 +518,10 @@ enum {
#define LCB_CRC_48B 0x2 /* 48b CRC */
#define LCB_CRC_12B_16B_PER_LANE 0x3 /* 12b-16b per lane CRC */
-/* the following enum is (almost) a copy/paste of the definition
- * in the OPA spec, section 20.2.2.6.8 (PortInfo) */
+/*
+ * the following enum is (almost) a copy/paste of the definition
+ * in the OPA spec, section 20.2.2.6.8 (PortInfo)
+ */
enum {
PORT_LTP_CRC_MODE_NONE = 0,
PORT_LTP_CRC_MODE_14 = 1, /* 14-bit LTP CRC mode (optional) */
@@ -614,6 +623,8 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
#define NUM_PCIE_SERDES 16 /* number of PCIe serdes on the SBus */
extern const u8 pcie_serdes_broadcast[];
extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES];
+extern uint platform_config_load;
+
/* SBus commands */
#define RESET_SBUS_RECEIVER 0x20
#define WRITE_SBUS_RECEIVER 0x21
@@ -629,6 +640,42 @@ int load_firmware(struct hfi1_devdata *dd);
void dispose_firmware(void);
int acquire_hw_mutex(struct hfi1_devdata *dd);
void release_hw_mutex(struct hfi1_devdata *dd);
+
+/*
+ * Bitmask of dynamic access for ASIC block chip resources. Each HFI has its
+ * own range of bits for the resource so it can clear its own bits on
+ * starting and exiting. If either HFI has the resource bit set, the
+ * resource is in use. The separate bit ranges are:
+ * HFI0 bits 7:0
+ * HFI1 bits 15:8
+ */
+#define CR_SBUS 0x01 /* SBUS, THERM, and PCIE registers */
+#define CR_EPROM 0x02 /* EEP, GPIO registers */
+#define CR_I2C1 0x04 /* QSFP1_OE register */
+#define CR_I2C2 0x08 /* QSFP2_OE register */
+#define CR_DYN_SHIFT 8 /* dynamic flag shift */
+#define CR_DYN_MASK ((1ull << CR_DYN_SHIFT) - 1)
+
+/*
+ * Bitmask of static ASIC states these are outside of the dynamic ASIC
+ * block chip resources above. These are to be set once and never cleared.
+ * Must be holding the SBus dynamic flag when setting.
+ */
+#define CR_THERM_INIT 0x010000
+
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait);
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource);
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+ const char *func);
+void init_chip_resources(struct hfi1_devdata *dd);
+void finish_chip_resources(struct hfi1_devdata *dd);
+
+/* ms wait time for access to an SBus resoure */
+#define SBUS_TIMEOUT 4000 /* long enough for a FW download and SBR */
+
+/* ms wait time for a qsfp (i2c) chain to become available */
+#define QSFP_WAIT 20000 /* long enough for FW update to the F4 uc */
+
void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
@@ -644,13 +691,17 @@ void handle_verify_cap(struct work_struct *work);
void handle_freeze(struct work_struct *work);
void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
+void handle_8051_request(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
+void reset_qsfp(struct hfi1_pportdata *ppd);
+void qsfp_event(struct work_struct *work);
void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
int send_idle_sma(struct hfi1_devdata *dd, u64 message);
+int load_8051_config(struct hfi1_devdata *, u8, u8, u32);
+int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *);
int start_link(struct hfi1_pportdata *ppd);
-void init_qsfp(struct hfi1_pportdata *ppd);
int bringup_serdes(struct hfi1_pportdata *ppd);
void set_intr_state(struct hfi1_devdata *dd, u32 enable);
void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
@@ -690,6 +741,8 @@ u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl);
u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data);
u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl);
u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data);
+u32 read_logical_state(struct hfi1_devdata *dd);
+void force_recv_intr(struct hfi1_ctxtdata *rcd);
/* Per VL indexes */
enum {
@@ -785,8 +838,14 @@ enum {
C_SW_CPU_RCV_LIM,
C_SW_VTX_WAIT,
C_SW_PIO_WAIT,
+ C_SW_PIO_DRAIN,
C_SW_KMEM_WAIT,
C_SW_SEND_SCHED,
+ C_SDMA_DESC_FETCHED_CNT,
+ C_SDMA_INT_CNT,
+ C_SDMA_ERR_CNT,
+ C_SDMA_IDLE_INT_CNT,
+ C_SDMA_PROGRESS_INT_CNT,
/* MISC_ERR_STATUS */
C_MISC_PLL_LOCK_FAIL_ERR,
C_MISC_MBIST_FAIL_ERR,
@@ -1275,10 +1334,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
- u64 **cntrp);
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
- char **namep, u64 **cntrp);
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h
index 014d7a609ea0..770f05c9b8de 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/staging/rdma/hfi1/chip_registers.h
@@ -2,14 +2,13 @@
#define DEF_CHIP_REG
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -21,8 +20,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -1281,6 +1278,9 @@
#define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT 0
#define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK 0xFFFFull
#define PCIE_CFG_REG_PL2 (PCIE + 0x000000000708)
+#define PCIE_CFG_REG_PL3 (PCIE + 0x00000000070C)
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT 27
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK 0x38000000
#define PCIE_CFG_REG_PL102 (PCIE + 0x000000000898)
#define PCIE_CFG_REG_PL102_GEN3_EQ_POST_CURSOR_PSET_SHIFT 12
#define PCIE_CFG_REG_PL102_GEN3_EQ_CURSOR_PSET_SHIFT 6
@@ -1301,5 +1301,6 @@
#define CCE_INT_BLOCKED (CCE + 0x000000110C00)
#define SEND_DMA_IDLE_CNT (TXE + 0x000000200040)
#define SEND_DMA_DESC_FETCHED_CNT (TXE + 0x000000200058)
+#define CCE_MSIX_PBA_OFFSET 0X0110000
#endif /* DEF_CHIP_REG */
diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h
index 5dd92720faae..e9b6bb322025 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/staging/rdma/hfi1/common.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -341,19 +338,16 @@ struct hfi1_message_header {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_PERMISSIVE_LID 0xFFFF
#define HFI1_AETH_CREDIT_SHIFT 24
#define HFI1_AETH_CREDIT_MASK 0x1F
#define HFI1_AETH_CREDIT_INVAL 0x1F
#define HFI1_MSN_MASK 0xFFFFFF
-#define HFI1_QPN_MASK 0xFFFFFF
#define HFI1_FECN_SHIFT 31
#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK (1 << HFI1_FECN_SHIFT)
+#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
#define HFI1_BECN_SHIFT 30
#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK (1 << HFI1_BECN_SHIFT)
-#define HFI1_MULTICAST_LID_BASE 0xC000
+#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c
index acd2269e9f14..dbab9d9cc288 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/staging/rdma/hfi1/debugfs.c
@@ -1,13 +1,12 @@
#ifdef CONFIG_DEBUG_FS
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -19,8 +18,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -52,6 +49,7 @@
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/export.h>
+#include <linux/module.h>
#include "hfi.h"
#include "debugfs.h"
@@ -71,6 +69,7 @@ static const struct seq_operations _##name##_seq_ops = { \
.stop = _##name##_seq_stop, \
.show = _##name##_seq_show \
}
+
#define DEBUGFS_SEQ_FILE_OPEN(name) \
static int _##name##_open(struct inode *inode, struct file *s) \
{ \
@@ -102,7 +101,6 @@ do { \
pr_warn("create of %s failed\n", name); \
} while (0)
-
#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
@@ -127,7 +125,6 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
}
-
static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
__releases(RCU)
{
@@ -151,8 +148,8 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
if (!n_packets && !n_bytes)
return SEQ_SKIP;
seq_printf(s, "%02llx %llu/%llu\n", i,
- (unsigned long long) n_packets,
- (unsigned long long) n_bytes);
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes);
return 0;
}
@@ -247,7 +244,7 @@ __acquires(RCU)
}
static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
- loff_t *pos)
+ loff_t *pos)
{
struct qp_iter *iter = iter_ptr;
@@ -308,7 +305,6 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
}
-
static void _sdes_seq_stop(struct seq_file *s, void *v)
__releases(RCU)
{
@@ -341,7 +337,7 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
rcu_read_lock();
dd = private2dd(file);
- avail = hfi1_read_cntrs(dd, *ppos, NULL, &counters);
+ avail = hfi1_read_cntrs(dd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
rcu_read_unlock();
return rval;
@@ -358,7 +354,7 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
rcu_read_lock();
dd = private2dd(file);
- avail = hfi1_read_cntrs(dd, *ppos, &names, NULL);
+ avail = hfi1_read_cntrs(dd, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
rcu_read_unlock();
return rval;
@@ -385,8 +381,7 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
rcu_read_lock();
dd = private2dd(file);
- /* port number n/a here since names are constant */
- avail = hfi1_read_portcntrs(dd, *ppos, 0, &names, NULL);
+ avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
rcu_read_unlock();
return rval;
@@ -394,28 +389,150 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
/* read the per-port counters */
static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
u64 *counters;
size_t avail;
- struct hfi1_devdata *dd;
struct hfi1_pportdata *ppd;
ssize_t rval;
rcu_read_lock();
ppd = private2ppd(file);
- dd = ppd->dd;
- avail = hfi1_read_portcntrs(dd, *ppos, ppd->port - 1, NULL, &counters);
+ avail = hfi1_read_portcntrs(ppd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
rcu_read_unlock();
return rval;
}
+static void check_dyn_flag(u64 scratch0, char *p, int size, int *used,
+ int this_hfi, int hfi, u32 flag, const char *what)
+{
+ u32 mask;
+
+ mask = flag << (hfi ? CR_DYN_SHIFT : 0);
+ if (scratch0 & mask) {
+ *used += scnprintf(p + *used, size - *used,
+ " 0x%08x - HFI%d %s in use, %s device\n",
+ mask, hfi, what,
+ this_hfi == hfi ? "this" : "other");
+ }
+}
+
+static ssize_t asic_flags_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
+ u64 scratch0;
+ char *tmp;
+ int ret = 0;
+ int size;
+ int used;
+ int i;
+
+ rcu_read_lock();
+ ppd = private2ppd(file);
+ dd = ppd->dd;
+ size = PAGE_SIZE;
+ used = 0;
+ tmp = kmalloc(size, GFP_KERNEL);
+ if (!tmp) {
+ rcu_read_unlock();
+ return -ENOMEM;
+ }
+
+ scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+ used += scnprintf(tmp + used, size - used,
+ "Resource flags: 0x%016llx\n", scratch0);
+
+ /* check permanent flag */
+ if (scratch0 & CR_THERM_INIT) {
+ used += scnprintf(tmp + used, size - used,
+ " 0x%08x - thermal monitoring initialized\n",
+ (u32)CR_THERM_INIT);
+ }
+
+ /* check each dynamic flag on each HFI */
+ for (i = 0; i < 2; i++) {
+ check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+ CR_SBUS, "SBus");
+ check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+ CR_EPROM, "EPROM");
+ check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+ CR_I2C1, "i2c chain 1");
+ check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+ CR_I2C2, "i2c chain 2");
+ }
+ used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
+
+ ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
+ rcu_read_unlock();
+ kfree(tmp);
+ return ret;
+}
+
+static ssize_t asic_flags_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
+ char *buff;
+ int ret;
+ unsigned long long value;
+ u64 scratch0;
+ u64 clear;
+
+ rcu_read_lock();
+ ppd = private2ppd(file);
+ dd = ppd->dd;
+
+ buff = kmalloc(count + 1, GFP_KERNEL);
+ if (!buff) {
+ ret = -ENOMEM;
+ goto do_return;
+ }
+
+ ret = copy_from_user(buff, buf, count);
+ if (ret > 0) {
+ ret = -EFAULT;
+ goto do_free;
+ }
+
+ /* zero terminate and read the expected integer */
+ buff[count] = 0;
+ ret = kstrtoull(buff, 0, &value);
+ if (ret)
+ goto do_free;
+ clear = value;
+
+ /* obtain exclusive access */
+ mutex_lock(&dd->asic_data->asic_resource_mutex);
+ acquire_hw_mutex(dd);
+
+ scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+ scratch0 &= ~clear;
+ write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+ /* force write to be visible to other HFI on another OS */
+ (void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+ release_hw_mutex(dd);
+ mutex_unlock(&dd->asic_data->asic_resource_mutex);
+
+ /* return the number of bytes written */
+ ret = count;
+
+ do_free:
+ kfree(buff);
+ do_return:
+ rcu_read_unlock();
+ return ret;
+}
+
/*
* read the per-port QSFP data for ppd
*/
static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
struct hfi1_pportdata *ppd;
char *tmp;
@@ -439,7 +556,7 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
/* Do an i2c write operation on the chain for the given HFI. */
static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, u32 target)
+ size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
@@ -451,6 +568,16 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
rcu_read_lock();
ppd = private2ppd(file);
+ /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+ i2c_addr = (*ppos >> 16) & 0xffff;
+ offset = *ppos & 0xffff;
+
+ /* explicitly reject invalid address 0 to catch cp and cat */
+ if (i2c_addr == 0) {
+ ret = -EINVAL;
+ goto _return;
+ }
+
buff = kmalloc(count, GFP_KERNEL);
if (!buff) {
ret = -ENOMEM;
@@ -463,9 +590,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
goto _free;
}
- i2c_addr = (*ppos >> 16) & 0xff;
- offset = *ppos & 0xffff;
-
total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -485,21 +609,21 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
/* Do an i2c write operation on chain for HFI 0. */
static ssize_t i2c1_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __i2c_debugfs_write(file, buf, count, ppos, 0);
}
/* Do an i2c write operation on chain for HFI 1. */
static ssize_t i2c2_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __i2c_debugfs_write(file, buf, count, ppos, 1);
}
/* Do an i2c read operation on the chain for the given HFI. */
static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos, u32 target)
+ size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
@@ -511,15 +635,22 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
rcu_read_lock();
ppd = private2ppd(file);
+ /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+ i2c_addr = (*ppos >> 16) & 0xffff;
+ offset = *ppos & 0xffff;
+
+ /* explicitly reject invalid address 0 to catch cp and cat */
+ if (i2c_addr == 0) {
+ ret = -EINVAL;
+ goto _return;
+ }
+
buff = kmalloc(count, GFP_KERNEL);
if (!buff) {
ret = -ENOMEM;
goto _return;
}
- i2c_addr = (*ppos >> 16) & 0xff;
- offset = *ppos & 0xffff;
-
total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
if (total_read < 0) {
ret = total_read;
@@ -545,21 +676,21 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
/* Do an i2c read operation on chain for HFI 0. */
static ssize_t i2c1_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __i2c_debugfs_read(file, buf, count, ppos, 0);
}
/* Do an i2c read operation on chain for HFI 1. */
static ssize_t i2c2_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __i2c_debugfs_read(file, buf, count, ppos, 1);
}
/* Do a QSFP write operation on the i2c chain for the given HFI. */
static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, u32 target)
+ size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
@@ -605,21 +736,21 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
/* Do a QSFP write operation on i2c chain for HFI 0. */
static ssize_t qsfp1_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __qsfp_debugfs_write(file, buf, count, ppos, 0);
}
/* Do a QSFP write operation on i2c chain for HFI 1. */
static ssize_t qsfp2_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __qsfp_debugfs_write(file, buf, count, ppos, 1);
}
/* Do a QSFP read operation on the i2c chain for the given HFI. */
static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos, u32 target)
+ size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
@@ -665,18 +796,116 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
/* Do a QSFP read operation on i2c chain for HFI 0. */
static ssize_t qsfp1_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __qsfp_debugfs_read(file, buf, count, ppos, 0);
}
/* Do a QSFP read operation on i2c chain for HFI 1. */
static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
return __qsfp_debugfs_read(file, buf, count, ppos, 1);
}
+static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+ struct hfi1_pportdata *ppd;
+ int ret;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ ppd = private2ppd(fp);
+
+ ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+ if (ret) /* failed - release the module */
+ module_put(THIS_MODULE);
+
+ return ret;
+}
+
+static int i2c1_debugfs_open(struct inode *in, struct file *fp)
+{
+ return __i2c_debugfs_open(in, fp, 0);
+}
+
+static int i2c2_debugfs_open(struct inode *in, struct file *fp)
+{
+ return __i2c_debugfs_open(in, fp, 1);
+}
+
+static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+ struct hfi1_pportdata *ppd;
+
+ ppd = private2ppd(fp);
+
+ release_chip_resource(ppd->dd, i2c_target(target));
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+static int i2c1_debugfs_release(struct inode *in, struct file *fp)
+{
+ return __i2c_debugfs_release(in, fp, 0);
+}
+
+static int i2c2_debugfs_release(struct inode *in, struct file *fp)
+{
+ return __i2c_debugfs_release(in, fp, 1);
+}
+
+static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+ struct hfi1_pportdata *ppd;
+ int ret;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ ppd = private2ppd(fp);
+
+ ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+ if (ret) /* failed - release the module */
+ module_put(THIS_MODULE);
+
+ return ret;
+}
+
+static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
+{
+ return __qsfp_debugfs_open(in, fp, 0);
+}
+
+static int qsfp2_debugfs_open(struct inode *in, struct file *fp)
+{
+ return __qsfp_debugfs_open(in, fp, 1);
+}
+
+static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+ struct hfi1_pportdata *ppd;
+
+ ppd = private2ppd(fp);
+
+ release_chip_resource(ppd->dd, i2c_target(target));
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+static int qsfp1_debugfs_release(struct inode *in, struct file *fp)
+{
+ return __qsfp_debugfs_release(in, fp, 0);
+}
+
+static int qsfp2_debugfs_release(struct inode *in, struct file *fp)
+{
+ return __qsfp_debugfs_release(in, fp, 1);
+}
+
#define DEBUGFS_OPS(nm, readroutine, writeroutine) \
{ \
.name = nm, \
@@ -687,6 +916,18 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
}, \
}
+#define DEBUGFS_XOPS(nm, readf, writef, openf, releasef) \
+{ \
+ .name = nm, \
+ .ops = { \
+ .read = readf, \
+ .write = writef, \
+ .llseek = generic_file_llseek, \
+ .open = openf, \
+ .release = releasef \
+ }, \
+}
+
static const struct counter_info cntr_ops[] = {
DEBUGFS_OPS("counter_names", dev_names_read, NULL),
DEBUGFS_OPS("counters", dev_counters_read, NULL),
@@ -695,11 +936,16 @@ static const struct counter_info cntr_ops[] = {
static const struct counter_info port_cntr_ops[] = {
DEBUGFS_OPS("port%dcounters", portcntrs_debugfs_read, NULL),
- DEBUGFS_OPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write),
- DEBUGFS_OPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write),
+ DEBUGFS_XOPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write,
+ i2c1_debugfs_open, i2c1_debugfs_release),
+ DEBUGFS_XOPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write,
+ i2c2_debugfs_open, i2c2_debugfs_release),
DEBUGFS_OPS("qsfp_dump%d", qsfp_debugfs_dump, NULL),
- DEBUGFS_OPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write),
- DEBUGFS_OPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write),
+ DEBUGFS_XOPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write,
+ qsfp1_debugfs_open, qsfp1_debugfs_release),
+ DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
+ qsfp2_debugfs_open, qsfp2_debugfs_release),
+ DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
};
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
@@ -747,8 +993,8 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
ibd->hfi1_ibdev_dbg,
ppd,
&port_cntr_ops[i].ops,
- port_cntr_ops[i].ops.write == NULL ?
- S_IRUGO : S_IRUGO|S_IWUSR);
+ !port_cntr_ops[i].ops.write ?
+ S_IRUGO : S_IRUGO | S_IWUSR);
}
}
diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/staging/rdma/hfi1/debugfs.h
index 92d6fe146714..b6fb6814f1b8 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/staging/rdma/hfi1/debugfs.h
@@ -1,14 +1,13 @@
#ifndef _HFI1_DEBUGFS_H
#define _HFI1_DEBUGFS_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c
index 58472e5ac4e5..c05c39da83b1 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/staging/rdma/hfi1/device.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h
index 2850ff739d81..5bb3e83cf2da 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/staging/rdma/hfi1/device.h
@@ -1,14 +1,13 @@
#ifndef _HFI1_DEVICE_H
#define _HFI1_DEVICE_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
index e41159fe6889..c5b520bf610e 100644
--- a/drivers/staging/rdma/hfi1/diag.c
+++ b/drivers/staging/rdma/hfi1/diag.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -70,6 +67,7 @@
#include "hfi.h"
#include "device.h"
#include "common.h"
+#include "verbs_txreq.h"
#include "trace.h"
#undef pr_fmt
@@ -80,15 +78,15 @@
/* Snoop option mask */
#define SNOOP_DROP_SEND BIT(0)
#define SNOOP_USE_METADATA BIT(1)
+#define SNOOP_SET_VL0TOVL15 BIT(2)
static u8 snoop_flags;
/*
* Extract packet length from LRH header.
- * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the
- * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes
+ * This is in Dwords so multiply by 4 to get size in bytes
*/
-#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
+#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
enum hfi1_filter_status {
HFI1_FILTER_HIT,
@@ -860,7 +858,7 @@ static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
vl = sc4;
} else {
sl = (byte_two >> 4) & 0xf;
- ibp = to_iport(&dd->verbs_dev.ibdev, 1);
+ ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
sc5 = ibp->sl_to_sc[sl];
vl = sc_to_vlt(dd, sc5);
if (vl != sc4) {
@@ -966,6 +964,65 @@ static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
return ret;
}
+/**
+ * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
+ * @ppd : ptr to hfi1 port data
+ * @value : options from user space
+ *
+ * Assumes the rest of the CM credit registers are zero from a
+ * previous global or credit reset.
+ * Leave shared count at zero for both global and all vls.
+ * In snoop mode ideally we don't use shared credits
+ * Reserve 8.5k for VL15
+ * If total credits less than 8.5kbytes return error.
+ * Divide the rest of the credits across VL0 to VL7 and if
+ * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
+ * return with an error.
+ * The credit registers will be reset to zero on link negotiation or link up
+ * so this function should be activated from user space only if the port has
+ * gone past link negotiation and link up.
+ *
+ * Return -- 0 if successful else error condition
+ *
+ */
+static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
+ int value)
+{
+#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */
+ struct buffer_control t;
+ int i;
+ struct hfi1_devdata *dd = ppd->dd;
+ u16 total_credits = (value >> 16) & 0xffff;
+ u16 vl15_credits = dd->vl15_init / 2;
+ u16 per_vl_credits;
+ __be16 be_per_vl_credits;
+
+ if (!(ppd->host_link_state & HLS_UP))
+ goto err_exit;
+ if (total_credits < vl15_credits)
+ goto err_exit;
+
+ per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
+
+ if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
+ goto err_exit;
+
+ memset(&t, 0, sizeof(t));
+ be_per_vl_credits = cpu_to_be16(per_vl_credits);
+
+ for (i = 0; i < TXE_NUM_DATA_VL; i++)
+ t.vl[i].dedicated = be_per_vl_credits;
+
+ t.vl[15].dedicated = cpu_to_be16(vl15_credits);
+ return set_buffer_control(ppd, &t);
+
+err_exit:
+ snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
+ ppd->host_link_state, total_credits, vl15_credits);
+
+ return -EINVAL;
+}
+
static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
{
struct hfi1_devdata *dd;
@@ -1192,6 +1249,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
snoop_flags |= SNOOP_DROP_SEND;
if (value & SNOOP_USE_METADATA)
snoop_flags |= SNOOP_USE_METADATA;
+ if (value & (SNOOP_SET_VL0TOVL15)) {
+ ppd = &dd->pport[0]; /* first port will do */
+ ret = hfi1_assign_snoop_link_credits(ppd, value);
+ }
break;
default:
return -ENOTTY;
@@ -1603,7 +1664,7 @@ int snoop_recv_handler(struct hfi1_packet *packet)
/*
* Handle snooping and capturing packets when sdma is being used.
*/
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
@@ -1616,20 +1677,19 @@ int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
* bypass packets. The only way to send a bypass packet currently is to use the
* diagpkt interface. When that interface is enable snoop/capture is not.
*/
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
- struct ahg_ib_header *ahdr = qp->s_hdr;
u32 hdrwords = qp->s_hdrwords;
- struct hfi1_sge_state *ss = qp->s_cur_sge;
+ struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
u32 dwords = (len + 3) >> 2;
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
struct snoop_packet *s_packet = NULL;
- u32 *hdr = (u32 *)&ahdr->ibh;
+ u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
u32 length = 0;
- struct hfi1_sge_state temp_ss;
+ struct rvt_sge_state temp_ss;
void *data = NULL;
void *data_start = NULL;
int ret;
@@ -1638,7 +1698,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
struct capture_md md;
u32 vl;
u32 hdr_len = hdrwords << 2;
- u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh);
+ u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
md.u.pbc = 0;
@@ -1665,7 +1725,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
md.port = 1;
md.dir = PKT_DIR_EGRESS;
if (likely(pbc == 0)) {
- vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12;
+ vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
} else {
md.u.pbc = 0;
@@ -1727,7 +1787,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
ret = HFI1_FILTER_HIT;
} else {
ret = ppd->dd->hfi1_snoop.filter_callback(
- &ahdr->ibh,
+ &ps->s_txreq->phdr.hdr,
NULL,
ppd->dd->hfi1_snoop.filter_value);
}
@@ -1759,9 +1819,16 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_rc_send_complete(qp, &ahdr->ibh);
+ hfi1_rc_send_complete(qp,
+ &ps->s_txreq->phdr.hdr);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
+
+ /*
+ * If snoop is dropping the packet we need to put the
+ * txreq back because no one else will.
+ */
+ hfi1_put_txreq(ps->s_txreq);
return 0;
}
break;
diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/staging/rdma/hfi1/dma.c
index e03bd735173c..7e8dab892848 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/staging/rdma/hfi1/dma.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -52,7 +49,7 @@
#include "verbs.h"
-#define BAD_DMA_ADDRESS ((u64) 0)
+#define BAD_DMA_ADDRESS ((u64)0)
/*
* The following functions implement driver specific replacements
@@ -74,7 +71,7 @@ static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
if (WARN_ON(!valid_dma_direction(direction)))
return BAD_DMA_ADDRESS;
- return (u64) cpu_addr;
+ return (u64)cpu_addr;
}
static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
@@ -95,7 +92,7 @@ static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
if (offset + size > PAGE_SIZE)
return BAD_DMA_ADDRESS;
- addr = (u64) page_address(page);
+ addr = (u64)page_address(page);
if (addr)
addr += offset;
@@ -120,7 +117,7 @@ static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
return BAD_DMA_ADDRESS;
for_each_sg(sgl, sg, nents, i) {
- addr = (u64) page_address(sg_page(sg));
+ addr = (u64)page_address(sg_page(sg));
if (!addr) {
ret = 0;
break;
@@ -161,14 +158,14 @@ static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
if (p)
addr = page_address(p);
if (dma_handle)
- *dma_handle = (u64) addr;
+ *dma_handle = (u64)addr;
return addr;
}
static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
void *cpu_addr, u64 dma_handle)
{
- free_pages((unsigned long) cpu_addr, get_order(size));
+ free_pages((unsigned long)cpu_addr, get_order(size));
}
struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c
index ee50bbf64d39..34511e5df1d5 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -56,6 +53,7 @@
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/prefetch.h>
+#include <rdma/ib_verbs.h>
#include "hfi.h"
#include "trace.h"
@@ -162,6 +160,22 @@ const char *get_unit_name(int unit)
return iname;
}
+const char *get_card_name(struct rvt_dev_info *rdi)
+{
+ struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+ struct hfi1_devdata *dd = container_of(ibdev,
+ struct hfi1_devdata, verbs_dev);
+ return get_unit_name(dd->unit);
+}
+
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
+{
+ struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+ struct hfi1_devdata *dd = container_of(ibdev,
+ struct hfi1_devdata, verbs_dev);
+ return dd->pcidev;
+}
+
/*
* Return count of units with at least one port ACTIVE.
*/
@@ -265,6 +279,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct hfi1_devdata *dd = ppd->dd;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
return;
@@ -283,9 +299,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
goto drop;
/* Check for GRH */
- if (lnh == HFI1_LRH_BTH)
+ if (lnh == HFI1_LRH_BTH) {
ohdr = &hdr->u.oth;
- else if (lnh == HFI1_LRH_GRH) {
+ } else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
ohdr = &hdr->u.l.oth;
@@ -295,17 +311,17 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
rcv_flags |= HFI1_HAS_GRH;
- } else
+ } else {
goto drop;
-
+ }
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
- if (lid < HFI1_MULTICAST_LID_BASE) {
- struct hfi1_qp *qp;
+ qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+ if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ struct rvt_qp *qp;
unsigned long flags;
rcu_read_lock();
- qp = hfi1_lookup_qpn(ibp, qp_num);
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!qp) {
rcu_read_unlock();
goto drop;
@@ -318,9 +334,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
spin_lock_irqsave(&qp->r_lock, flags);
/* Check for valid receive state. */
- if (!(ib_hfi1_state_ops[qp->state] &
- HFI1_PROCESS_RECV_OK)) {
- ibp->n_pkt_drops++;
+ if (!(ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_RECV_OK)) {
+ ibp->rvp.n_pkt_drops++;
}
switch (qp->ibqp.qp_type) {
@@ -352,7 +368,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (rhf_use_egr_bfr(packet->rhf))
ebuf = packet->ebuf;
- if (ebuf == NULL)
+ if (!ebuf)
goto drop; /* this should never happen */
if (lnh == HFI1_LRH_BTH)
@@ -370,7 +386,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
* Only in pre-B0 h/w is the CNP_OPCODE handled
* via this code path.
*/
- struct hfi1_qp *qp = NULL;
+ struct rvt_qp *qp = NULL;
u32 lqpn, rqpn;
u16 rlid;
u8 svc_type, sl, sc5;
@@ -380,10 +396,10 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
sc5 |= 0x10;
sl = ibp->sc_to_sl[sc5];
- lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK;
+ lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
rcu_read_lock();
- qp = hfi1_lookup_qpn(ibp, lqpn);
- if (qp == NULL) {
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
+ if (!qp) {
rcu_read_unlock();
goto drop;
}
@@ -419,9 +435,8 @@ drop:
}
static inline void init_packet(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet)
+ struct hfi1_packet *packet)
{
-
packet->rsize = rcd->rcvhdrqentsize; /* words */
packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
packet->rcd = rcd;
@@ -434,12 +449,7 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->rcv_flags = 0;
}
-#ifndef CONFIG_PRESCAN_RXQ
-static void prescan_rxq(struct hfi1_packet *packet) {}
-#else /* !CONFIG_PRESCAN_RXQ */
-static int prescan_receive_queue;
-
-static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
+static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
struct hfi1_other_headers *ohdr,
u64 rhf, u32 bth1, struct ib_grh *grh)
{
@@ -453,7 +463,7 @@ static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
case IB_QPT_GSI:
case IB_QPT_UD:
rlid = be16_to_cpu(hdr->lrh[3]);
- rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+ rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
@@ -483,7 +493,7 @@ static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
if (bth1 & HFI1_BECN_SMASK) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 lqpn = bth1 & HFI1_QPN_MASK;
+ u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc5];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
@@ -562,26 +572,31 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
* containing Excplicit Congestion Notifications (FECNs, or BECNs).
* When an ECN is found, process the Congestion Notification, and toggle
* it off.
+ * This is declared as a macro to allow quick checking of the port to avoid
+ * the overhead of a function call if not enabled.
*/
-static void prescan_rxq(struct hfi1_packet *packet)
+#define prescan_rxq(rcd, packet) \
+ do { \
+ if (rcd->ppd->cc_prescan) \
+ __prescan_rxq(packet); \
+ } while (0)
+static void __prescan_rxq(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ps_mdata mdata;
- if (!prescan_receive_queue)
- return;
-
init_ps_mdata(&mdata, packet);
while (1) {
struct hfi1_devdata *dd = rcd->dd;
struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
- __le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head +
+ __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
- struct hfi1_qp *qp;
+ struct rvt_qp *qp;
struct hfi1_ib_header *hdr;
struct hfi1_other_headers *ohdr;
struct ib_grh *grh = NULL;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
int is_ecn = 0;
@@ -600,25 +615,25 @@ static void prescan_rxq(struct hfi1_packet *packet)
hfi1_get_msgheader(dd, rhf_addr);
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_BTH)
+ if (lnh == HFI1_LRH_BTH) {
ohdr = &hdr->u.oth;
- else if (lnh == HFI1_LRH_GRH) {
+ } else if (lnh == HFI1_LRH_GRH) {
ohdr = &hdr->u.l.oth;
grh = &hdr->u.l.grh;
- } else
+ } else {
goto next; /* just in case */
-
+ }
bth1 = be32_to_cpu(ohdr->bth[1]);
is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
if (!is_ecn)
goto next;
- qpn = bth1 & HFI1_QPN_MASK;
+ qpn = bth1 & RVT_QPN_MASK;
rcu_read_lock();
- qp = hfi1_lookup_qpn(ibp, qpn);
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
- if (qp == NULL) {
+ if (!qp) {
rcu_read_unlock();
goto next;
}
@@ -633,7 +648,6 @@ next:
update_ps_mdata(&mdata, rcd);
}
}
-#endif /* CONFIG_PRESCAN_RXQ */
static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
{
@@ -683,8 +697,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
* The +2 is the size of the RHF.
*/
prefetch_range(packet->ebuf,
- packet->tlen - ((packet->rcd->rcvhdrqentsize -
- (rhf_hdrq_offset(packet->rhf)+2)) * 4));
+ packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ (rhf_hdrq_offset(packet->rhf)
+ + 2)) * 4));
}
/*
@@ -712,7 +727,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
}
}
- packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
+ packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
packet->rcd->dd->rhf_offset;
packet->rhf = rhf_to_cpu(packet->rhf_addr);
@@ -737,7 +752,6 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
static inline void finish_packet(struct hfi1_packet *packet)
{
-
/*
* Nothing we need to free for the packet.
*
@@ -746,14 +760,12 @@ static inline void finish_packet(struct hfi1_packet *packet)
*/
update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
packet->etail, rcv_intr_dynamic, packet->numpkt);
-
}
static inline void process_rcv_qp_work(struct hfi1_packet *packet)
{
-
struct hfi1_ctxtdata *rcd;
- struct hfi1_qp *qp, *nqp;
+ struct rvt_qp *qp, *nqp;
rcd = packet->rcd;
rcd->head = packet->rhqoff;
@@ -764,17 +776,17 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
*/
list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
list_del_init(&qp->rspwait);
- if (qp->r_flags & HFI1_R_RSP_DEFERED_ACK) {
- qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+ if (qp->r_flags & RVT_R_RSP_NAK) {
+ qp->r_flags &= ~RVT_R_RSP_NAK;
hfi1_send_rc_ack(rcd, qp, 0);
}
- if (qp->r_flags & HFI1_R_RSP_SEND) {
+ if (qp->r_flags & RVT_R_RSP_SEND) {
unsigned long flags;
- qp->r_flags &= ~HFI1_R_RSP_SEND;
+ qp->r_flags &= ~RVT_R_RSP_SEND;
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_hfi1_state_ops[qp->state] &
- HFI1_PROCESS_OR_FLUSH_SEND)
+ if (ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_OR_FLUSH_SEND)
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
@@ -799,7 +811,7 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
goto bail;
}
- prescan_rxq(&packet);
+ prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet, thread);
@@ -830,7 +842,7 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
}
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
- prescan_rxq(&packet);
+ prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet, thread);
@@ -862,6 +874,37 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
&handle_receive_interrupt_dma_rtail;
}
+void set_all_slowpath(struct hfi1_devdata *dd)
+{
+ int i;
+
+ /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+}
+
+static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
+ struct hfi1_packet packet,
+ struct hfi1_devdata *dd)
+{
+ struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
+ struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
+ packet.rhf_addr);
+
+ if (hdr2sc(hdr, packet.rhf) != 0xf) {
+ int hwstate = read_logical_state(dd);
+
+ if (hwstate != LSTATE_ACTIVE) {
+ dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+ return 0;
+ }
+
+ queue_work(rcd->ppd->hfi1_wq, lsaw);
+ return 1;
+ }
+ return 0;
+}
+
/*
* handle_receive_interrupt - receive a packet
* @rcd: the context
@@ -910,17 +953,17 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
}
}
- prescan_rxq(&packet);
+ prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
-
- if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
- DROP_PACKET_OFF) == DROP_PACKET_ON)) {
+ if (unlikely(dd->do_drop &&
+ atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+ DROP_PACKET_ON)) {
dd->do_drop = 0;
/* On to the next packet */
packet.rhqoff += packet.rsize;
- packet.rhf_addr = (__le32 *) rcd->rcvhdrq +
+ packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
packet.rhqoff +
dd->rhf_offset;
packet.rhf = rhf_to_cpu(packet.rhf_addr);
@@ -929,6 +972,11 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = skip_rcv_packet(&packet, thread);
skip_pkt = 0;
} else {
+ /* Auto activate link on non-SC15 packet receive */
+ if (unlikely(rcd->ppd->host_link_state ==
+ HLS_UP_ARMED) &&
+ set_armed_to_active(rcd, packet, dd))
+ goto bail;
last = process_rcv_packet(&packet, thread);
}
@@ -940,8 +988,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
if (seq != rcd->seq_cnt)
last = RCV_PKT_DONE;
if (needset) {
- dd_dev_info(dd,
- "Switching to NO_DMA_RTAIL\n");
+ dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
set_all_nodma_rtail(dd);
needset = 0;
}
@@ -984,6 +1031,42 @@ bail:
}
/*
+ * We may discover in the interrupt that the hardware link state has
+ * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
+ * and we need to update the driver's notion of the link state. We cannot
+ * run set_link_state from interrupt context, so we queue this function on
+ * a workqueue.
+ *
+ * We delay the regular interrupt processing until after the state changes
+ * so that the link will be in the correct state by the time any application
+ * we wake up attempts to send a reply to any message it received.
+ * (Subsequent receive interrupts may possibly force the wakeup before we
+ * update the link state.)
+ *
+ * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
+ * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
+ * so we're safe from use-after-free of the rcd.
+ */
+void receive_interrupt_work(struct work_struct *work)
+{
+ struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+ linkstate_active_work);
+ struct hfi1_devdata *dd = ppd->dd;
+ int i;
+
+ /* Received non-SC15 packet implies neighbor_normal */
+ ppd->neighbor_normal = 1;
+ set_link_state(ppd, HLS_UP_ACTIVE);
+
+ /*
+ * Interrupt all kernel contexts that could have had an
+ * interrupt during auto activation.
+ */
+ for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+ force_recv_intr(dd->rcd[i]);
+}
+
+/*
* Convert a given MTU size to the on-wire MAD packet enumeration.
* Return -1 if the size is invalid.
*/
@@ -1037,9 +1120,9 @@ int set_mtu(struct hfi1_pportdata *ppd)
ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
mutex_lock(&ppd->hls_lock);
- if (ppd->host_link_state == HLS_UP_INIT
- || ppd->host_link_state == HLS_UP_ARMED
- || ppd->host_link_state == HLS_UP_ACTIVE)
+ if (ppd->host_link_state == HLS_UP_INIT ||
+ ppd->host_link_state == HLS_UP_ARMED ||
+ ppd->host_link_state == HLS_UP_ACTIVE)
is_up = 1;
drain = !is_ax(dd) && is_up;
@@ -1082,79 +1165,80 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
return 0;
}
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDs, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
+void shutdown_led_override(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * This pairs with the memory barrier in hfi1_start_led_override to
+ * ensure that we read the correct state of LED beaconing represented
+ * by led_override_timer_active
+ */
+ smp_rmb();
+ if (atomic_read(&ppd->led_override_timer_active)) {
+ del_timer_sync(&ppd->led_override_timer);
+ atomic_set(&ppd->led_override_timer_active, 0);
+ /* Ensure the atomic_set is visible to all CPUs */
+ smp_wmb();
+ }
+
+ /* Hand control of the LED to the DC for normal operation */
+ write_csr(dd, DCC_CFG_LED_CNTRL, 0);
+}
static void run_led_override(unsigned long opaque)
{
struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
struct hfi1_devdata *dd = ppd->dd;
- int timeoff;
- int ph_idx;
+ unsigned long timeout;
+ int phase_idx;
if (!(dd->flags & HFI1_INITTED))
return;
- ph_idx = ppd->led_override_phase++ & 1;
- ppd->led_override = ppd->led_override_vals[ph_idx];
- timeoff = ppd->led_override_timeoff;
+ phase_idx = ppd->led_override_phase & 1;
- /*
- * don't re-fire the timer if user asked for it to be off; we let
- * it fire one more time after they turn it off to simplify
- */
- if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
- mod_timer(&ppd->led_override_timer, jiffies + timeoff);
+ setextled(dd, phase_idx);
+
+ timeout = ppd->led_override_vals[phase_idx];
+
+ /* Set up for next phase */
+ ppd->led_override_phase = !ppd->led_override_phase;
+
+ mod_timer(&ppd->led_override_timer, jiffies + timeout);
}
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val)
+/*
+ * To have the LED blink in a particular pattern, provide timeon and timeoff
+ * in milliseconds.
+ * To turn off custom blinking and return to normal operation, use
+ * shutdown_led_override()
+ */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+ unsigned int timeoff)
{
- struct hfi1_devdata *dd = ppd->dd;
- int timeoff, freq;
-
- if (!(dd->flags & HFI1_INITTED))
+ if (!(ppd->dd->flags & HFI1_INITTED))
return;
- /* First check if we are blinking. If not, use 1HZ polling */
- timeoff = HZ;
- freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+ /* Convert to jiffies for direct use in timer */
+ ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
+ ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
- if (freq) {
- /* For blink, set each phase from one nybble of val */
- ppd->led_override_vals[0] = val & 0xF;
- ppd->led_override_vals[1] = (val >> 4) & 0xF;
- timeoff = (HZ << 4)/freq;
- } else {
- /* Non-blink set both phases the same. */
- ppd->led_override_vals[0] = val & 0xF;
- ppd->led_override_vals[1] = val & 0xF;
- }
- ppd->led_override_timeoff = timeoff;
+ /* Arbitrarily start from LED on phase */
+ ppd->led_override_phase = 1;
/*
* If the timer has not already been started, do so. Use a "quick"
- * timeout so the function will be called soon, to look at our request.
+ * timeout so the handler will be called soon to look at our request.
*/
- if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
- /* Need to start timer */
+ if (!timer_pending(&ppd->led_override_timer)) {
setup_timer(&ppd->led_override_timer, run_led_override,
- (unsigned long)ppd);
-
+ (unsigned long)ppd);
ppd->led_override_timer.expires = jiffies + 1;
add_timer(&ppd->led_override_timer);
- } else {
- if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
- mod_timer(&ppd->led_override_timer, jiffies + 1);
- atomic_dec(&ppd->led_override_timer_active);
+ atomic_set(&ppd->led_override_timer_active, 1);
+ /* Ensure the atomic_set is visible to all CPUs */
+ smp_wmb();
}
}
@@ -1184,8 +1268,8 @@ int hfi1_reset_device(int unit)
if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
dd_dev_info(dd,
- "Invalid unit number %u or not initialized or not present\n",
- unit);
+ "Invalid unit number %u or not initialized or not present\n",
+ unit);
ret = -ENXIO;
goto bail;
}
@@ -1203,14 +1287,8 @@ int hfi1_reset_device(int unit)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
- if (atomic_read(&ppd->led_override_timer_active)) {
- /* Need to stop LED timer, _then_ shut off LEDs */
- del_timer_sync(&ppd->led_override_timer);
- atomic_set(&ppd->led_override_timer_active, 0);
- }
- /* Shut off LEDs after we are sure timer is not running */
- ppd->led_override = LED_OVER_BOTH_OFF;
+ shutdown_led_override(ppd);
}
if (dd->flags & HFI1_HAS_SEND_DMA)
sdma_exit(dd);
@@ -1221,11 +1299,11 @@ int hfi1_reset_device(int unit)
if (ret)
dd_dev_err(dd,
- "Reinitialize unit %u after reset failed with %d\n",
- unit, ret);
+ "Reinitialize unit %u after reset failed with %d\n",
+ unit, ret);
else
dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
- unit);
+ unit);
bail:
return ret;
@@ -1282,7 +1360,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
handle_eflags(packet);
dd_dev_err(packet->rcd->dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
+ "Bypass packets are not supported in normal operation. Dropping\n");
return RHF_RCV_CONTINUE;
}
@@ -1320,6 +1398,6 @@ int kdeth_process_eager(struct hfi1_packet *packet)
int process_receive_invalid(struct hfi1_packet *packet)
{
dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
- rhf_rcv_type(packet->rhf));
+ rhf_rcv_type(packet->rhf));
return RHF_RCV_CONTINUE;
}
diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/staging/rdma/hfi1/efivar.c
index 47dfe2584760..106349fc1fb9 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/staging/rdma/hfi1/efivar.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/staging/rdma/hfi1/efivar.h
index 070706225c51..94e9e70de568 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/staging/rdma/hfi1/efivar.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
index fb620c97f592..bd8771570f81 100644
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ b/drivers/staging/rdma/hfi1/eprom.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -99,17 +96,17 @@
/* sleep length while waiting for controller */
#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */
-#define COUNT_DELAY_SEC(n) ((n) * (1000000/WAIT_SLEEP_US))
+#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
/* GPIO pins */
-#define EPROM_WP_N (1ull << 14) /* EPROM write line */
+#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */
/*
- * Use the EP mutex to guard against other callers from within the driver.
- * Also covers usage of eprom_available.
+ * How long to wait for the EPROM to become available, in ms.
+ * The spec 32 Mb EPROM takes around 40s to erase then write.
+ * Double it for safety.
*/
-static DEFINE_MUTEX(eprom_mutex);
-static int eprom_available; /* default: not available */
+#define EPROM_TIMEOUT 80000 /* ms */
/*
* Turn on external enable line that allows writing on the flash.
@@ -117,11 +114,9 @@ static int eprom_available; /* default: not available */
static void write_enable(struct hfi1_devdata *dd)
{
/* raise signal */
- write_csr(dd, ASIC_GPIO_OUT,
- read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
+ write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
/* raise enable */
- write_csr(dd, ASIC_GPIO_OE,
- read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
+ write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
}
/*
@@ -130,11 +125,9 @@ static void write_enable(struct hfi1_devdata *dd)
static void write_disable(struct hfi1_devdata *dd)
{
/* lower signal */
- write_csr(dd, ASIC_GPIO_OUT,
- read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
+ write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
/* lower enable */
- write_csr(dd, ASIC_GPIO_OE,
- read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
+ write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
}
/*
@@ -212,8 +205,8 @@ static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len)
/* check the end points for the minimum erase */
if ((start & MASK_4KB) || (end & MASK_4KB)) {
dd_dev_err(dd,
- "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
- __func__, start, end);
+ "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
+ __func__, start, end);
return -EINVAL;
}
@@ -256,7 +249,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
int i;
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
- for (i = 0; i < EP_PAGE_SIZE/sizeof(u32); i++)
+ for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
}
@@ -267,7 +260,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
{
u32 offset;
- u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+ u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
int ret = 0;
/* reject anything not on an EPROM page boundary */
@@ -277,7 +270,7 @@ static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
read_page(dd, start + offset, buffer);
if (copy_to_user((void __user *)(addr + offset),
- buffer, EP_PAGE_SIZE)) {
+ buffer, EP_PAGE_SIZE)) {
ret = -EFAULT;
goto done;
}
@@ -298,7 +291,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
write_csr(dd, ASIC_EEP_DATA, data[0]);
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
- for (i = 1; i < EP_PAGE_SIZE/sizeof(u32); i++)
+ for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
write_csr(dd, ASIC_EEP_DATA, data[i]);
/* will close the open page */
return wait_for_not_busy(dd);
@@ -310,7 +303,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
{
u32 offset;
- u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+ u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
int ret = 0;
/* reject anything not on an EPROM page boundary */
@@ -321,7 +314,7 @@ static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
if (copy_from_user(buffer, (void __user *)(addr + offset),
- EP_PAGE_SIZE)) {
+ EP_PAGE_SIZE)) {
ret = -EFAULT;
goto done;
}
@@ -353,44 +346,42 @@ static inline u32 extract_rstart(u32 composite)
*
* Return 0 on success, -ERRNO on error
*/
-int handle_eprom_command(const struct hfi1_cmd *cmd)
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
{
struct hfi1_devdata *dd;
u32 dev_id;
u32 rlen; /* range length */
u32 rstart; /* range start */
+ int i_minor;
int ret = 0;
/*
- * The EPROM is per-device, so use unit 0 as that will always
- * exist.
+ * Map the device file to device data using the relative minor.
+ * The device file minor number is the unit number + 1. 0 is
+ * the generic device file - reject it.
*/
- dd = hfi1_lookup(0);
+ i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
+ if (i_minor <= 0)
+ return -EINVAL;
+ dd = hfi1_lookup(i_minor - 1);
if (!dd) {
- pr_err("%s: cannot find unit 0!\n", __func__);
+ pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
return -EINVAL;
}
- /* lock against other callers touching the ASIC block */
- mutex_lock(&eprom_mutex);
-
- /* some platforms do not have an EPROM */
- if (!eprom_available) {
- ret = -ENOSYS;
- goto done_asic;
- }
+ /* some devices do not have an EPROM */
+ if (!dd->eprom_available)
+ return -EOPNOTSUPP;
- /* lock against the other HFI on another OS */
- ret = acquire_hw_mutex(dd);
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
if (ret) {
- dd_dev_err(dd,
- "%s: unable to acquire hw mutex, no EPROM support\n",
- __func__);
+ dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
+ __func__);
goto done_asic;
}
dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
- __func__, cmd->type, cmd->len, cmd->addr);
+ __func__, cmd->type, cmd->len, cmd->addr);
switch (cmd->type) {
case HFI1_CMD_EP_INFO:
@@ -401,7 +392,7 @@ int handle_eprom_command(const struct hfi1_cmd *cmd)
dev_id = read_device_id(dd);
/* addr points to a u32 user buffer */
if (copy_to_user((void __user *)cmd->addr, &dev_id,
- sizeof(u32)))
+ sizeof(u32)))
ret = -EFAULT;
break;
@@ -429,14 +420,13 @@ int handle_eprom_command(const struct hfi1_cmd *cmd)
default:
dd_dev_err(dd, "%s: unexpected command %d\n",
- __func__, cmd->type);
+ __func__, cmd->type);
ret = -EINVAL;
break;
}
- release_hw_mutex(dd);
+ release_chip_resource(dd, CR_EPROM);
done_asic:
- mutex_unlock(&eprom_mutex);
return ret;
}
@@ -447,44 +437,35 @@ int eprom_init(struct hfi1_devdata *dd)
{
int ret = 0;
- /* only the discrete chip has an EPROM, nothing to do */
+ /* only the discrete chip has an EPROM */
if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
return 0;
- /* lock against other callers */
- mutex_lock(&eprom_mutex);
- if (eprom_available) /* already initialized */
- goto done_asic;
-
/*
- * Lock against the other HFI on another OS - the mutex above
- * would have caught anything in this driver. It is OK if
- * both OSes reset the EPROM - as long as they don't do it at
- * the same time.
+ * It is OK if both HFIs reset the EPROM as long as they don't
+ * do it at the same time.
*/
- ret = acquire_hw_mutex(dd);
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
if (ret) {
dd_dev_err(dd,
- "%s: unable to acquire hw mutex, no EPROM support\n",
- __func__);
+ "%s: unable to acquire EPROM resource, no EPROM support\n",
+ __func__);
goto done_asic;
}
/* reset EPROM to be sure it is in a good state */
/* set reset */
- write_csr(dd, ASIC_EEP_CTL_STAT,
- ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
+ write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
/* clear reset, set speed */
write_csr(dd, ASIC_EEP_CTL_STAT,
- EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
+ EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
/* wake the device with command "release powerdown NoID" */
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
- eprom_available = 1;
- release_hw_mutex(dd);
+ dd->eprom_available = true;
+ release_chip_resource(dd, CR_EPROM);
done_asic:
- mutex_unlock(&eprom_mutex);
return ret;
}
diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/staging/rdma/hfi1/eprom.h
index 64a64276be81..d41f0b1afb15 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/staging/rdma/hfi1/eprom.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -52,4 +49,4 @@ struct hfi1_cmd;
struct hfi1_devdata;
int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(const struct hfi1_cmd *cmd);
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index 8b911e8bf0df..8396dc5fb6c1 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -60,6 +57,8 @@
#include "user_sdma.h"
#include "user_exp_rcv.h"
#include "eprom.h"
+#include "aspm.h"
+#include "mmu_rb.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -96,9 +95,6 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
static int vma_fault(struct vm_area_struct *, struct vm_fault *);
-static int exp_tid_setup(struct file *, struct hfi1_tid_info *);
-static int exp_tid_free(struct file *, struct hfi1_tid_info *);
-static void unlock_exp_tids(struct hfi1_ctxtdata *);
static const struct file_operations hfi1_file_ops = {
.owner = THIS_MODULE,
@@ -164,7 +160,6 @@ enum mmap_types {
#define dbg(fmt, ...) \
pr_info(fmt, ##__VA_ARGS__)
-
static inline int is_valid_mmap(u64 token)
{
return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
@@ -188,6 +183,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
struct hfi1_cmd cmd;
struct hfi1_user_info uinfo;
struct hfi1_tid_info tinfo;
+ unsigned long addr;
ssize_t consumed = 0, copy = 0, ret = 0;
void *dest = NULL;
__u64 user_val = 0;
@@ -219,6 +215,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
break;
case HFI1_CMD_TID_UPDATE:
case HFI1_CMD_TID_FREE:
+ case HFI1_CMD_TID_INVAL_READ:
copy = sizeof(tinfo);
dest = &tinfo;
break;
@@ -294,9 +291,8 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
sc_return_credits(uctxt->sc);
break;
case HFI1_CMD_TID_UPDATE:
- ret = exp_tid_setup(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
if (!ret) {
- unsigned long addr;
/*
* Copy the number of tidlist entries we used
* and the length of the buffer we registered.
@@ -311,8 +307,25 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
ret = -EFAULT;
}
break;
+ case HFI1_CMD_TID_INVAL_READ:
+ ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+ if (ret)
+ break;
+ addr = (unsigned long)cmd.addr +
+ offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+ ret = -EFAULT;
+ break;
case HFI1_CMD_TID_FREE:
- ret = exp_tid_free(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+ if (ret)
+ break;
+ addr = (unsigned long)cmd.addr +
+ offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+ ret = -EFAULT;
break;
case HFI1_CMD_RECV_CTRL:
ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
@@ -373,8 +386,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
break;
}
if (dd->flags & HFI1_FORCED_FREEZE) {
- /* Don't allow context reset if we are into
- * forced freeze */
+ /*
+ * Don't allow context reset if we are into
+ * forced freeze
+ */
ret = -ENODEV;
break;
}
@@ -382,8 +397,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
ret = sc_enable(sc);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
uctxt->ctxt);
- } else
+ } else {
ret = sc_restart(sc);
+ }
if (!ret)
sc_return_credits(sc);
break;
@@ -393,7 +409,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
case HFI1_CMD_EP_ERASE_RANGE:
case HFI1_CMD_EP_READ_RANGE:
case HFI1_CMD_EP_WRITE_RANGE:
- ret = handle_eprom_command(&cmd);
+ ret = handle_eprom_command(fp, &cmd);
break;
}
@@ -732,6 +748,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* drain user sdma queue */
hfi1_user_sdma_free_queues(fdata);
+ /* release the cpu */
+ hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+
/*
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
@@ -755,6 +774,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
HFI1_RCVCTRL_TIDFLOW_DIS |
HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
@@ -777,14 +797,12 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
uctxt->pionowait = 0;
uctxt->event_flags = 0;
- hfi1_clear_tids(uctxt);
+ hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
- if (uctxt->tid_pg_list)
- unlock_exp_tids(uctxt);
-
hfi1_stats.sps_ctxts--;
- dd->freectxts++;
+ if (++dd->freectxts == dd->num_user_contexts)
+ aspm_enable_all(dd);
mutex_unlock(&hfi1_mutex);
hfi1_free_ctxtdata(dd, uctxt);
done:
@@ -826,8 +844,16 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
mutex_lock(&hfi1_mutex);
/* First, lets check if we need to setup a shared context? */
- if (uinfo->subctxt_cnt)
+ if (uinfo->subctxt_cnt) {
+ struct hfi1_filedata *fd = fp->private_data;
+
ret = find_shared_ctxt(fp, uinfo);
+ if (ret < 0)
+ goto done_unlock;
+ if (ret)
+ fd->rec_cpu_num = hfi1_get_proc_affinity(
+ fd->uctxt->dd, fd->uctxt->numa_id);
+ }
/*
* We execute the following block if we couldn't find a
@@ -837,6 +863,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
ret = get_user_context(fp, uinfo, i_minor - 1, alg);
}
+done_unlock:
mutex_unlock(&hfi1_mutex);
done:
return ret;
@@ -962,7 +989,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt;
unsigned ctxt;
- int ret;
+ int ret, numa;
if (dd->flags & HFI1_FROZEN) {
/*
@@ -982,17 +1009,26 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
if (ctxt == dd->num_rcv_contexts)
return -EBUSY;
- uctxt = hfi1_create_ctxtdata(dd->pport, ctxt);
+ fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+ if (fd->rec_cpu_num != -1)
+ numa = cpu_to_node(fd->rec_cpu_num);
+ else
+ numa = numa_node_id();
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
if (!uctxt) {
dd_dev_err(dd,
"Unable to allocate ctxtdata memory, failing open\n");
return -ENOMEM;
}
+ hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
+ uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
+ uctxt->numa_id);
+
/*
* Allocate and enable a PIO send context.
*/
uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
- uctxt->numa_id);
+ uctxt->dd->node);
if (!uctxt->sc)
return -ENOMEM;
@@ -1026,7 +1062,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
INIT_LIST_HEAD(&uctxt->sdma_queues);
spin_lock_init(&uctxt->sdma_qlock);
hfi1_stats.sps_ctxts++;
- dd->freectxts--;
+ /*
+ * Disable ASPM when there are open user/PSM contexts to avoid
+ * issues with ASPM L1 exit latency
+ */
+ if (dd->freectxts-- == dd->num_user_contexts)
+ aspm_disable_all(dd);
fd->uctxt = uctxt;
return 0;
@@ -1035,22 +1076,19 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
- int ret = 0;
unsigned num_subctxts;
num_subctxts = uinfo->subctxt_cnt;
- if (num_subctxts > HFI1_MAX_SHARED_CTXTS) {
- ret = -EINVAL;
- goto bail;
- }
+ if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
+ return -EINVAL;
uctxt->subctxt_cnt = uinfo->subctxt_cnt;
uctxt->subctxt_id = uinfo->subctxt_id;
uctxt->active_slaves = 1;
uctxt->redirect_seq_cnt = 1;
set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-bail:
- return ret;
+
+ return 0;
}
static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
@@ -1105,10 +1143,10 @@ static int user_init(struct file *fp)
* has done it.
*/
if (fd->subctxt) {
- ret = wait_event_interruptible(uctxt->wait,
- !test_bit(HFI1_CTXT_MASTER_UNINIT,
- &uctxt->event_flags));
- goto done;
+ ret = wait_event_interruptible(uctxt->wait, !test_bit(
+ HFI1_CTXT_MASTER_UNINIT,
+ &uctxt->event_flags));
+ goto expected;
}
/* initialize poll variables... */
@@ -1146,8 +1184,16 @@ static int user_init(struct file *fp)
rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ /*
+ * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
+ * We can't rely on the correct value to be set from prior
+ * uses of the chip or ctxt. Therefore, add the rcvctrl op
+ * for both cases.
+ */
if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+ else
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
/* Notify any waiting slaves */
@@ -1155,8 +1201,18 @@ static int user_init(struct file *fp)
clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
- ret = 0;
+expected:
+ /*
+ * Expected receive has to be setup for all processes (including
+ * shared contexts). However, it has to be done after the master
+ * context has been fully configured as it depends on the
+ * eager/expected split of the RcvArray entries.
+ * Setting it up here ensures that the subcontexts will be waiting
+ * (due to the above wait_event_interruptible() until the master
+ * is setup.
+ */
+ ret = hfi1_user_exp_rcv_init(fp);
done:
return ret;
}
@@ -1226,46 +1282,6 @@ static int setup_ctxt(struct file *fp)
if (ret)
goto done;
}
- /* Setup Expected Rcv memories */
- uctxt->tid_pg_list = vzalloc(uctxt->expected_count *
- sizeof(struct page **));
- if (!uctxt->tid_pg_list) {
- ret = -ENOMEM;
- goto done;
- }
- uctxt->physshadow = vzalloc(uctxt->expected_count *
- sizeof(*uctxt->physshadow));
- if (!uctxt->physshadow) {
- ret = -ENOMEM;
- goto done;
- }
- /* allocate expected TID map and initialize the cursor */
- atomic_set(&uctxt->tidcursor, 0);
- uctxt->numtidgroups = uctxt->expected_count /
- dd->rcv_entries.group_size;
- uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG +
- !!(uctxt->numtidgroups % BITS_PER_LONG);
- uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt *
- sizeof(*uctxt->tidusemap),
- GFP_KERNEL, uctxt->numa_id);
- if (!uctxt->tidusemap) {
- ret = -ENOMEM;
- goto done;
- }
- /*
- * In case that the number of groups is not a multiple of
- * 64 (the number of groups in a tidusemap element), mark
- * the extra ones as used. This will effectively make them
- * permanently used and should never be assigned. Otherwise,
- * the code which checks how many free groups we have will
- * get completely confused about the state of the bits.
- */
- if (uctxt->numtidgroups % BITS_PER_LONG)
- uctxt->tidusemap[uctxt->tidmapcnt - 1] =
- ~((1ULL << (uctxt->numtidgroups %
- BITS_PER_LONG)) - 1);
- trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0,
- uctxt->tidusemap, uctxt->tidmapcnt);
}
ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
if (ret)
@@ -1391,8 +1407,9 @@ static unsigned int poll_next(struct file *fp,
set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
pollflag = 0;
- } else
+ } else {
pollflag = POLLIN | POLLRDNORM;
+ }
spin_unlock_irq(&dd->uctxt_lock);
return pollflag;
@@ -1470,8 +1487,9 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
if (uctxt->rcvhdrtail_kvaddr)
clear_rcvhdrtail(uctxt);
rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
- } else
+ } else {
rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
+ }
hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
/* always; new head should be equal to new tail; see above */
bail:
@@ -1504,367 +1522,6 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
return 0;
}
-#define num_user_pages(vaddr, len) \
- (1 + (((((unsigned long)(vaddr) + \
- (unsigned long)(len) - 1) & PAGE_MASK) - \
- ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-
-/**
- * tzcnt - count the number of trailing zeros in a 64bit value
- * @value: the value to be examined
- *
- * Returns the number of trailing least significant zeros in the
- * the input value. If the value is zero, return the number of
- * bits of the value.
- */
-static inline u8 tzcnt(u64 value)
-{
- return value ? __builtin_ctzl(value) : sizeof(value) * 8;
-}
-
-static inline unsigned num_free_groups(unsigned long map, u16 *start)
-{
- unsigned free;
- u16 bitidx = *start;
-
- if (bitidx >= BITS_PER_LONG)
- return 0;
- /* "Turn off" any bits set before our bit index */
- map &= ~((1ULL << bitidx) - 1);
- free = tzcnt(map) - bitidx;
- while (!free && bitidx < BITS_PER_LONG) {
- /* Zero out the last set bit so we look at the rest */
- map &= ~(1ULL << bitidx);
- /*
- * Account for the previously checked bits and advance
- * the bit index. We don't have to check for bitidx
- * getting bigger than BITS_PER_LONG here as it would
- * mean extra instructions that we don't need. If it
- * did happen, it would push free to a negative value
- * which will break the loop.
- */
- free = tzcnt(map) - ++bitidx;
- }
- *start = bitidx;
- return free;
-}
-
-static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo)
-{
- int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned tid, mapped = 0, npages, ngroups, exp_groups,
- tidpairs = uctxt->expected_count / 2;
- struct page **pages;
- unsigned long vaddr, tidmap[uctxt->tidmapcnt];
- dma_addr_t *phys;
- u32 tidlist[tidpairs], pairidx = 0, tidcursor;
- u16 useidx, idx, bitidx, tidcnt = 0;
-
- vaddr = tinfo->vaddr;
-
- if (offset_in_page(vaddr)) {
- ret = -EINVAL;
- goto bail;
- }
-
- npages = num_user_pages(vaddr, tinfo->length);
- if (!npages) {
- ret = -EINVAL;
- goto bail;
- }
- if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- ret = -EFAULT;
- goto bail;
- }
-
- memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt);
- memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs);
-
- exp_groups = uctxt->expected_count / dd->rcv_entries.group_size;
- /* which group set do we look at first? */
- tidcursor = atomic_read(&uctxt->tidcursor);
- useidx = (tidcursor >> 16) & 0xffff;
- bitidx = tidcursor & 0xffff;
-
- /*
- * Keep going until we've mapped all pages or we've exhausted all
- * RcvArray entries.
- * This iterates over the number of tidmaps + 1
- * (idx <= uctxt->tidmapcnt) so we check the bitmap which we
- * started from one more time for any free bits before the
- * starting point bit.
- */
- for (mapped = 0, idx = 0;
- mapped < npages && idx <= uctxt->tidmapcnt;) {
- u64 i, offset = 0;
- unsigned free, pinned, pmapped = 0, bits_used;
- u16 grp;
-
- /*
- * "Reserve" the needed group bits under lock so other
- * processes can't step in the middle of it. Once
- * reserved, we don't need the lock anymore since we
- * are guaranteed the groups.
- */
- spin_lock(&uctxt->exp_lock);
- if (uctxt->tidusemap[useidx] == -1ULL ||
- bitidx >= BITS_PER_LONG) {
- /* no free groups in the set, use the next */
- useidx = (useidx + 1) % uctxt->tidmapcnt;
- idx++;
- bitidx = 0;
- spin_unlock(&uctxt->exp_lock);
- continue;
- }
- ngroups = ((npages - mapped) / dd->rcv_entries.group_size) +
- !!((npages - mapped) % dd->rcv_entries.group_size);
-
- /*
- * If we've gotten here, the current set of groups does have
- * one or more free groups.
- */
- free = num_free_groups(uctxt->tidusemap[useidx], &bitidx);
- if (!free) {
- /*
- * Despite the check above, free could still come back
- * as 0 because we don't check the entire bitmap but
- * we start from bitidx.
- */
- spin_unlock(&uctxt->exp_lock);
- continue;
- }
- bits_used = min(free, ngroups);
- tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx;
- uctxt->tidusemap[useidx] |= tidmap[useidx];
- spin_unlock(&uctxt->exp_lock);
-
- /*
- * At this point, we know where in the map we have free bits.
- * properly offset into the various "shadow" arrays and compute
- * the RcvArray entry index.
- */
- offset = ((useidx * BITS_PER_LONG) + bitidx) *
- dd->rcv_entries.group_size;
- pages = uctxt->tid_pg_list + offset;
- phys = uctxt->physshadow + offset;
- tid = uctxt->expected_base + offset;
-
- /* Calculate how many pages we can pin based on free bits */
- pinned = min((bits_used * dd->rcv_entries.group_size),
- (npages - mapped));
- /*
- * Now that we know how many free RcvArray entries we have,
- * we can pin that many user pages.
- */
- ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE),
- pinned, true, pages);
- if (ret) {
- /*
- * We can't continue because the pages array won't be
- * initialized. This should never happen,
- * unless perhaps the user has mpin'ed the pages
- * themselves.
- */
- dd_dev_info(dd,
- "Failed to lock addr %p, %u pages: errno %d\n",
- (void *) vaddr, pinned, -ret);
- /*
- * Let go of the bits that we reserved since we are not
- * going to use them.
- */
- spin_lock(&uctxt->exp_lock);
- uctxt->tidusemap[useidx] &=
- ~(((1ULL << bits_used) - 1) << bitidx);
- spin_unlock(&uctxt->exp_lock);
- goto done;
- }
- /*
- * How many groups do we need based on how many pages we have
- * pinned?
- */
- ngroups = (pinned / dd->rcv_entries.group_size) +
- !!(pinned % dd->rcv_entries.group_size);
- /*
- * Keep programming RcvArray entries for all the <ngroups> free
- * groups.
- */
- for (i = 0, grp = 0; grp < ngroups; i++, grp++) {
- unsigned j;
- u32 pair_size = 0, tidsize;
- /*
- * This inner loop will program an entire group or the
- * array of pinned pages (which ever limit is hit
- * first).
- */
- for (j = 0; j < dd->rcv_entries.group_size &&
- pmapped < pinned; j++, pmapped++, tid++) {
- tidsize = PAGE_SIZE;
- phys[pmapped] = hfi1_map_page(dd->pcidev,
- pages[pmapped], 0,
- tidsize, PCI_DMA_FROMDEVICE);
- trace_hfi1_exp_rcv_set(uctxt->ctxt,
- fd->subctxt,
- tid, vaddr,
- phys[pmapped],
- pages[pmapped]);
- /*
- * Each RcvArray entry is programmed with one
- * page * worth of memory. This will handle
- * the 8K MTU as well as anything smaller
- * due to the fact that both entries in the
- * RcvTidPair are programmed with a page.
- * PSM currently does not handle anything
- * bigger than 8K MTU, so should we even worry
- * about 10K here?
- */
- hfi1_put_tid(dd, tid, PT_EXPECTED,
- phys[pmapped],
- ilog2(tidsize >> PAGE_SHIFT) + 1);
- pair_size += tidsize >> PAGE_SHIFT;
- EXP_TID_RESET(tidlist[pairidx], LEN, pair_size);
- if (!(tid % 2)) {
- tidlist[pairidx] |=
- EXP_TID_SET(IDX,
- (tid - uctxt->expected_base)
- / 2);
- tidlist[pairidx] |=
- EXP_TID_SET(CTRL, 1);
- tidcnt++;
- } else {
- tidlist[pairidx] |=
- EXP_TID_SET(CTRL, 2);
- pair_size = 0;
- pairidx++;
- }
- }
- /*
- * We've programmed the entire group (or as much of the
- * group as we'll use. Now, it's time to push it out...
- */
- flush_wc();
- }
- mapped += pinned;
- atomic_set(&uctxt->tidcursor,
- (((useidx & 0xffffff) << 16) |
- ((bitidx + bits_used) & 0xffffff)));
- }
- trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap,
- uctxt->tidmapcnt);
-
-done:
- /* If we've mapped anything, copy relevant info to user */
- if (mapped) {
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
- tidlist, sizeof(tidlist[0]) * tidcnt)) {
- ret = -EFAULT;
- goto done;
- }
- /* copy TID info to user */
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap,
- tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt))
- ret = -EFAULT;
- }
-bail:
- /*
- * Calculate mapped length. New Exp TID protocol does not "unwind" and
- * report an error if it can't map the entire buffer. It just reports
- * the length that was mapped.
- */
- tinfo->length = mapped * PAGE_SIZE;
- tinfo->tidcnt = tidcnt;
- return ret;
-}
-
-static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo)
-{
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned long tidmap[uctxt->tidmapcnt];
- struct page **pages;
- dma_addr_t *phys;
- u16 idx, bitidx, tid;
- int ret = 0;
-
- if (copy_from_user(&tidmap, (void __user *)(unsigned long)
- tinfo->tidmap,
- sizeof(tidmap[0]) * uctxt->tidmapcnt)) {
- ret = -EFAULT;
- goto done;
- }
- for (idx = 0; idx < uctxt->tidmapcnt; idx++) {
- unsigned long map;
-
- bitidx = 0;
- if (!tidmap[idx])
- continue;
- map = tidmap[idx];
- while ((bitidx = tzcnt(map)) < BITS_PER_LONG) {
- int i, pcount = 0;
- struct page *pshadow[dd->rcv_entries.group_size];
- unsigned offset = ((idx * BITS_PER_LONG) + bitidx) *
- dd->rcv_entries.group_size;
-
- pages = uctxt->tid_pg_list + offset;
- phys = uctxt->physshadow + offset;
- tid = uctxt->expected_base + offset;
- for (i = 0; i < dd->rcv_entries.group_size;
- i++, tid++) {
- if (pages[i]) {
- hfi1_put_tid(dd, tid, PT_INVALID,
- 0, 0);
- trace_hfi1_exp_rcv_free(uctxt->ctxt,
- fd->subctxt,
- tid, phys[i],
- pages[i]);
- pci_unmap_page(dd->pcidev, phys[i],
- PAGE_SIZE, PCI_DMA_FROMDEVICE);
- pshadow[pcount] = pages[i];
- pages[i] = NULL;
- pcount++;
- phys[i] = 0;
- }
- }
- flush_wc();
- hfi1_release_user_pages(pshadow, pcount, true);
- clear_bit(bitidx, &uctxt->tidusemap[idx]);
- map &= ~(1ULL<<bitidx);
- }
- }
- trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap,
- uctxt->tidmapcnt);
-done:
- return ret;
-}
-
-static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt)
-{
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned tid;
-
- dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n",
- uctxt->ctxt);
- for (tid = 0; tid < uctxt->expected_count; tid++) {
- struct page *p = uctxt->tid_pg_list[tid];
- dma_addr_t phys;
-
- if (!p)
- continue;
-
- phys = uctxt->physshadow[tid];
- uctxt->physshadow[tid] = 0;
- uctxt->tid_pg_list[tid] = NULL;
- pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
- hfi1_release_user_pages(&p, 1, true);
- }
-}
-
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
u16 pkey)
{
@@ -1933,10 +1590,9 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
return filp->f_pos;
}
-
/* NOTE: assumes unsigned long is 8 bytes */
static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
- loff_t *f_pos)
+ loff_t *f_pos)
{
struct hfi1_devdata *dd = filp->private_data;
void __iomem *base = dd->kregbase;
@@ -1972,12 +1628,12 @@ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
* them. These registers are defined as having a read value
* of 0.
*/
- else if (csr_off == ASIC_GPIO_CLEAR
- || csr_off == ASIC_GPIO_FORCE
- || csr_off == ASIC_QSFP1_CLEAR
- || csr_off == ASIC_QSFP1_FORCE
- || csr_off == ASIC_QSFP2_CLEAR
- || csr_off == ASIC_QSFP2_FORCE)
+ else if (csr_off == ASIC_GPIO_CLEAR ||
+ csr_off == ASIC_GPIO_FORCE ||
+ csr_off == ASIC_QSFP1_CLEAR ||
+ csr_off == ASIC_QSFP1_FORCE ||
+ csr_off == ASIC_QSFP2_CLEAR ||
+ csr_off == ASIC_QSFP2_FORCE)
data = 0;
else if (csr_off >= barlen) {
/*
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c
index 28ae42faa018..3040162cb326 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/staging/rdma/hfi1/firmware.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -77,7 +74,13 @@ static uint fw_8051_load = 1;
static uint fw_fabric_serdes_load = 1;
static uint fw_pcie_serdes_load = 1;
static uint fw_sbus_load = 1;
-static uint platform_config_load = 1;
+
+/*
+ * Access required in platform.c
+ * Maintains state of whether the platform config was fetched via the
+ * fallback option
+ */
+uint platform_config_load;
/* Firmware file names get set in hfi1_firmware_init() based on the above */
static char *fw_8051_name;
@@ -107,6 +110,7 @@ struct css_header {
u32 exponent_size; /* in DWORDs */
u32 reserved[22];
};
+
/* expected field values */
#define CSS_MODULE_TYPE 0x00000006
#define CSS_HEADER_LEN 0x000000a1
@@ -166,6 +170,7 @@ enum fw_state {
FW_FINAL,
FW_ERR
};
+
static enum fw_state fw_state = FW_EMPTY;
static int fw_err;
static struct firmware_details fw_8051;
@@ -193,7 +198,7 @@ static const struct firmware *platform_config;
#define RSA_ENGINE_TIMEOUT 100 /* ms */
/* hardware mutex timeout, in ms */
-#define HM_TIMEOUT 4000 /* 4 s */
+#define HM_TIMEOUT 10 /* ms */
/* 8051 memory access timeout, in us */
#define DC8051_ACCESS_TIMEOUT 100 /* us */
@@ -233,6 +238,8 @@ static const u8 all_pcie_serdes_broadcast = 0xe0;
/* forwards */
static void dispose_one_firmware(struct firmware_details *fdet);
+static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
+ struct firmware_details *fdet);
/*
* Read a single 64-bit value from 8051 data memory.
@@ -372,8 +379,8 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what,
return 0;
dd_dev_err(dd,
- "invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
- what, expected, actual);
+ "invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
+ what, expected, actual);
return 1;
}
@@ -383,19 +390,19 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what,
static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css)
{
/* verify CSS header fields (most sizes are in DW, so add /4) */
- if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE)
- || invalid_header(dd, "header_len", css->header_len,
- (sizeof(struct firmware_file)/4))
- || invalid_header(dd, "header_version",
- css->header_version, CSS_HEADER_VERSION)
- || invalid_header(dd, "module_vendor",
- css->module_vendor, CSS_MODULE_VENDOR)
- || invalid_header(dd, "key_size",
- css->key_size, KEY_SIZE/4)
- || invalid_header(dd, "modulus_size",
- css->modulus_size, KEY_SIZE/4)
- || invalid_header(dd, "exponent_size",
- css->exponent_size, EXPONENT_SIZE/4)) {
+ if (invalid_header(dd, "module_type", css->module_type,
+ CSS_MODULE_TYPE) ||
+ invalid_header(dd, "header_len", css->header_len,
+ (sizeof(struct firmware_file) / 4)) ||
+ invalid_header(dd, "header_version", css->header_version,
+ CSS_HEADER_VERSION) ||
+ invalid_header(dd, "module_vendor", css->module_vendor,
+ CSS_MODULE_VENDOR) ||
+ invalid_header(dd, "key_size", css->key_size, KEY_SIZE / 4) ||
+ invalid_header(dd, "modulus_size", css->modulus_size,
+ KEY_SIZE / 4) ||
+ invalid_header(dd, "exponent_size", css->exponent_size,
+ EXPONENT_SIZE / 4)) {
return -EINVAL;
}
return 0;
@@ -410,8 +417,8 @@ static int payload_check(struct hfi1_devdata *dd, const char *name,
/* make sure we have some payload */
if (prefix_size >= file_size) {
dd_dev_err(dd,
- "firmware \"%s\", size %ld, must be larger than %ld bytes\n",
- name, file_size, prefix_size);
+ "firmware \"%s\", size %ld, must be larger than %ld bytes\n",
+ name, file_size, prefix_size);
return -EINVAL;
}
@@ -433,8 +440,8 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
ret = request_firmware(&fdet->fw, name, &dd->pcidev->dev);
if (ret) {
- dd_dev_err(dd, "cannot find firmware \"%s\", err %d\n",
- name, ret);
+ dd_dev_warn(dd, "cannot find firmware \"%s\", err %d\n",
+ name, ret);
return ret;
}
@@ -480,14 +487,14 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
ret = verify_css_header(dd, css);
if (ret) {
dd_dev_info(dd, "Invalid CSS header for \"%s\"\n", name);
- } else if ((css->size*4) == fdet->fw->size) {
+ } else if ((css->size * 4) == fdet->fw->size) {
/* non-augmented firmware file */
struct firmware_file *ff = (struct firmware_file *)
fdet->fw->data;
/* make sure there are bytes in the payload */
ret = payload_check(dd, name, fdet->fw->size,
- sizeof(struct firmware_file));
+ sizeof(struct firmware_file));
if (ret == 0) {
fdet->css_header = css;
fdet->modulus = ff->modulus;
@@ -505,14 +512,14 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
dd_dev_err(dd, "driver is unable to validate firmware without r2 and mu (not in firmware file)\n");
ret = -EINVAL;
}
- } else if ((css->size*4) + AUGMENT_SIZE == fdet->fw->size) {
+ } else if ((css->size * 4) + AUGMENT_SIZE == fdet->fw->size) {
/* augmented firmware file */
struct augmented_firmware_file *aff =
(struct augmented_firmware_file *)fdet->fw->data;
/* make sure there are bytes in the payload */
ret = payload_check(dd, name, fdet->fw->size,
- sizeof(struct augmented_firmware_file));
+ sizeof(struct augmented_firmware_file));
if (ret == 0) {
fdet->css_header = css;
fdet->modulus = aff->modulus;
@@ -527,9 +534,10 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
} else {
/* css->size check failed */
dd_dev_err(dd,
- "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
- fdet->fw->size/4, (fdet->fw->size - AUGMENT_SIZE)/4,
- css->size);
+ "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
+ fdet->fw->size / 4,
+ (fdet->fw->size - AUGMENT_SIZE) / 4,
+ css->size);
ret = -EINVAL;
}
@@ -572,7 +580,7 @@ retry:
* We tried the original and it failed. Move to the
* alternate.
*/
- dd_dev_info(dd, "using alternate firmware names\n");
+ dd_dev_warn(dd, "using alternate firmware names\n");
/*
* Let others run. Some systems, when missing firmware, does
* something that holds for 30 seconds. If we do that twice
@@ -593,27 +601,27 @@ retry:
fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
}
- if (fw_8051_load) {
- err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
+ if (fw_sbus_load) {
+ err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
if (err)
goto done;
}
- if (fw_fabric_serdes_load) {
- err = obtain_one_firmware(dd, fw_fabric_serdes_name,
- &fw_fabric);
+ if (fw_pcie_serdes_load) {
+ err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie);
if (err)
goto done;
}
- if (fw_sbus_load) {
- err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
+ if (fw_fabric_serdes_load) {
+ err = obtain_one_firmware(dd, fw_fabric_serdes_name,
+ &fw_fabric);
if (err)
goto done;
}
- if (fw_pcie_serdes_load) {
- err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie);
+ if (fw_8051_load) {
+ err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
if (err)
goto done;
}
@@ -621,16 +629,18 @@ retry:
done:
if (err) {
/* oops, had problems obtaining a firmware */
- if (fw_state == FW_EMPTY) {
- /* retry with alternate */
+ if (fw_state == FW_EMPTY && dd->icode == ICODE_RTL_SILICON) {
+ /* retry with alternate (RTL only) */
fw_state = FW_TRY;
goto retry;
}
+ dd_dev_err(dd, "unable to obtain working firmware\n");
fw_state = FW_ERR;
fw_err = -ENOENT;
} else {
/* success */
- if (fw_state == FW_EMPTY)
+ if (fw_state == FW_EMPTY &&
+ dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
fw_state = FW_TRY; /* may retry later */
else
fw_state = FW_FINAL; /* cannot try again */
@@ -673,10 +683,15 @@ static int obtain_firmware(struct hfi1_devdata *dd)
}
/* not in FW_TRY state */
- if (fw_state == FW_FINAL)
+ if (fw_state == FW_FINAL) {
+ if (platform_config) {
+ dd->platform_config.data = platform_config->data;
+ dd->platform_config.size = platform_config->size;
+ }
goto done; /* already acquired */
- else if (fw_state == FW_ERR)
+ } else if (fw_state == FW_ERR) {
goto done; /* already tried and failed */
+ }
/* fw_state is FW_EMPTY */
/* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */
@@ -685,9 +700,13 @@ static int obtain_firmware(struct hfi1_devdata *dd)
if (platform_config_load) {
platform_config = NULL;
err = request_firmware(&platform_config, platform_config_name,
- &dd->pcidev->dev);
- if (err)
+ &dd->pcidev->dev);
+ if (err) {
platform_config = NULL;
+ goto done;
+ }
+ dd->platform_config.data = platform_config->data;
+ dd->platform_config.size = platform_config->size;
}
done:
@@ -761,7 +780,7 @@ static int retry_firmware(struct hfi1_devdata *dd, int load_result)
static void write_rsa_data(struct hfi1_devdata *dd, int what,
const u8 *data, int nbytes)
{
- int qw_size = nbytes/8;
+ int qw_size = nbytes / 8;
int i;
if (((unsigned long)data & 0x7) == 0) {
@@ -769,14 +788,14 @@ static void write_rsa_data(struct hfi1_devdata *dd, int what,
u64 *ptr = (u64 *)data;
for (i = 0; i < qw_size; i++, ptr++)
- write_csr(dd, what + (8*i), *ptr);
+ write_csr(dd, what + (8 * i), *ptr);
} else {
/* not aligned */
for (i = 0; i < qw_size; i++, data += 8) {
u64 value;
memcpy(&value, data, 8);
- write_csr(dd, what + (8*i), value);
+ write_csr(dd, what + (8 * i), value);
}
}
}
@@ -789,7 +808,7 @@ static void write_streamed_rsa_data(struct hfi1_devdata *dd, int what,
const u8 *data, int nbytes)
{
u64 *ptr = (u64 *)data;
- int qw_size = nbytes/8;
+ int qw_size = nbytes / 8;
for (; qw_size > 0; qw_size--, ptr++)
write_csr(dd, what, *ptr);
@@ -822,7 +841,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
>> MISC_CFG_FW_CTRL_RSA_STATUS_SHIFT;
if (status != RSA_STATUS_IDLE) {
dd_dev_err(dd, "%s security engine not idle - giving up\n",
- who);
+ who);
return -EBUSY;
}
@@ -859,7 +878,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
if (status == RSA_STATUS_IDLE) {
/* should not happen */
dd_dev_err(dd, "%s firmware security bad idle state\n",
- who);
+ who);
ret = -EINVAL;
break;
} else if (status == RSA_STATUS_DONE) {
@@ -893,19 +912,20 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
* is not keeping the error high.
*/
write_csr(dd, MISC_ERR_CLEAR,
- MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK
- | MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
+ MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK |
+ MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
/*
- * All that is left are the current errors. Print failure details,
- * if any.
+ * All that is left are the current errors. Print warnings on
+ * authorization failure details, if any. Firmware authorization
+ * can be retried, so these are only warnings.
*/
reg = read_csr(dd, MISC_ERR_STATUS);
if (ret) {
if (reg & MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK)
- dd_dev_err(dd, "%s firmware authorization failed\n",
- who);
+ dd_dev_warn(dd, "%s firmware authorization failed\n",
+ who);
if (reg & MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK)
- dd_dev_err(dd, "%s firmware key mismatch\n", who);
+ dd_dev_warn(dd, "%s firmware key mismatch\n", who);
}
return ret;
@@ -922,7 +942,8 @@ static void load_security_variables(struct hfi1_devdata *dd,
write_rsa_data(dd, MISC_CFG_RSA_MU, fdet->mu, MU_SIZE);
/* Security variables d. Write the header */
write_streamed_rsa_data(dd, MISC_CFG_SHA_PRELOAD,
- (u8 *)fdet->css_header, sizeof(struct css_header));
+ (u8 *)fdet->css_header,
+ sizeof(struct css_header));
}
/* return the 8051 firmware state */
@@ -1002,7 +1023,7 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
/* Firmware load steps 3-5 */
ret = write_8051(dd, 1/*code*/, 0, fdet->firmware_ptr,
- fdet->firmware_len);
+ fdet->firmware_len);
if (ret)
return ret;
@@ -1029,13 +1050,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
ret = wait_fm_ready(dd, TIMEOUT_8051_START);
if (ret) { /* timed out */
dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
- get_firmware_state(dd));
+ get_firmware_state(dd));
return -ETIMEDOUT;
}
read_misc_status(dd, &ver_a, &ver_b);
dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
+ (int)ver_b, (int)ver_a);
dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
return 0;
@@ -1050,11 +1071,11 @@ void sbus_request(struct hfi1_devdata *dd,
u8 receiver_addr, u8 data_addr, u8 command, u32 data_in)
{
write_csr(dd, ASIC_CFG_SBUS_REQUEST,
- ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT)
- | ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT)
- | ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT)
- | ((u64)receiver_addr
- << ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
+ ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT) |
+ ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT) |
+ ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT) |
+ ((u64)receiver_addr <<
+ ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
}
/*
@@ -1072,14 +1093,14 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
return;
dd_dev_info(dd, "Turning off spicos:%s%s\n",
- flags & SPICO_SBUS ? " SBus" : "",
- flags & SPICO_FABRIC ? " fabric" : "");
+ flags & SPICO_SBUS ? " SBus" : "",
+ flags & SPICO_FABRIC ? " fabric" : "");
write_csr(dd, MISC_CFG_FW_CTRL, ENABLE_SPICO_SMASK);
/* disable SBus spico */
if (flags & SPICO_SBUS)
sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01,
- WRITE_SBUS_RECEIVER, 0x00000040);
+ WRITE_SBUS_RECEIVER, 0x00000040);
/* disable the fabric serdes spicos */
if (flags & SPICO_FABRIC)
@@ -1089,29 +1110,60 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
}
/*
- * Reset all of the fabric serdes for our HFI.
+ * Reset all of the fabric serdes for this HFI in preparation to take the
+ * link to Polling.
+ *
+ * To do a reset, we need to write to to the serdes registers. Unfortunately,
+ * the fabric serdes download to the other HFI on the ASIC will have turned
+ * off the firmware validation on this HFI. This means we can't write to the
+ * registers to reset the serdes. Work around this by performing a complete
+ * re-download and validation of the fabric serdes firmware. This, as a
+ * by-product, will reset the serdes. NOTE: the re-download requires that
+ * the 8051 be in the Offline state. I.e. not actively trying to use the
+ * serdes. This routine is called at the point where the link is Offline and
+ * is getting ready to go to Polling.
*/
void fabric_serdes_reset(struct hfi1_devdata *dd)
{
- u8 ra;
+ int ret;
- if (dd->icode != ICODE_RTL_SILICON) /* only for RTL */
+ if (!fw_fabric_serdes_load)
return;
- ra = fabric_serdes_broadcast[dd->hfi1_id];
-
- acquire_hw_mutex(dd);
+ ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+ if (ret) {
+ dd_dev_err(dd,
+ "Cannot acquire SBus resource to reset fabric SerDes - perhaps you should reboot\n");
+ return;
+ }
set_sbus_fast_mode(dd);
- /* place SerDes in reset and disable SPICO */
- sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
- /* wait 100 refclk cycles @ 156.25MHz => 640ns */
- udelay(1);
- /* remove SerDes reset */
- sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
- /* turn SPICO enable on */
- sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+
+ if (is_ax(dd)) {
+ /* A0 serdes do not work with a re-download */
+ u8 ra = fabric_serdes_broadcast[dd->hfi1_id];
+
+ /* place SerDes in reset and disable SPICO */
+ sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
+ /* wait 100 refclk cycles @ 156.25MHz => 640ns */
+ udelay(1);
+ /* remove SerDes reset */
+ sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
+ /* turn SPICO enable on */
+ sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+ } else {
+ turn_off_spicos(dd, SPICO_FABRIC);
+ /*
+ * No need for firmware retry - what to download has already
+ * been decided.
+ * No need to pay attention to the load return - the only
+ * failure is a validation failure, which has already been
+ * checked by the initial download.
+ */
+ (void)load_fabric_serdes_firmware(dd, &fw_fabric);
+ }
+
clear_sbus_fast_mode(dd);
- release_hw_mutex(dd);
+ release_chip_resource(dd, CR_SBUS);
}
/* Access to the SBus in this routine should probably be serialized */
@@ -1120,6 +1172,9 @@ int sbus_request_slow(struct hfi1_devdata *dd,
{
u64 reg, count = 0;
+ /* make sure fast mode is clear */
+ clear_sbus_fast_mode(dd);
+
sbus_request(dd, receiver_addr, data_addr, command, data_in);
write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
ASIC_CFG_SBUS_EXECUTE_EXECUTE_SMASK);
@@ -1177,7 +1232,7 @@ static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
/* step 5: download SerDes machine code */
for (i = 0; i < fdet->firmware_len; i += 4) {
sbus_request(dd, ra, 0x0a, WRITE_SBUS_RECEIVER,
- *(u32 *)&fdet->firmware_ptr[i]);
+ *(u32 *)&fdet->firmware_ptr[i]);
}
/* step 6: IMEM override off */
sbus_request(dd, ra, 0x00, WRITE_SBUS_RECEIVER, 0x00000000);
@@ -1216,7 +1271,7 @@ static int load_sbus_firmware(struct hfi1_devdata *dd,
/* step 5: download the SBus Master machine code */
for (i = 0; i < fdet->firmware_len; i += 4) {
sbus_request(dd, ra, 0x14, WRITE_SBUS_RECEIVER,
- *(u32 *)&fdet->firmware_ptr[i]);
+ *(u32 *)&fdet->firmware_ptr[i]);
}
/* step 6: set IMEM_CNTL_EN off */
sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000040);
@@ -1249,19 +1304,23 @@ static int load_pcie_serdes_firmware(struct hfi1_devdata *dd,
/* step 3: enable XDMEM access */
sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000d40);
/* step 4: load firmware into SBus Master XDMEM */
- /* NOTE: the dmem address, write_en, and wdata are all pre-packed,
- we only need to pick up the bytes and write them */
+ /*
+ * NOTE: the dmem address, write_en, and wdata are all pre-packed,
+ * we only need to pick up the bytes and write them
+ */
for (i = 0; i < fdet->firmware_len; i += 4) {
sbus_request(dd, ra, 0x04, WRITE_SBUS_RECEIVER,
- *(u32 *)&fdet->firmware_ptr[i]);
+ *(u32 *)&fdet->firmware_ptr[i]);
}
/* step 5: disable XDMEM access */
sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000140);
/* step 6: allow SBus Spico to run */
sbus_request(dd, ra, 0x05, WRITE_SBUS_RECEIVER, 0x00000000);
- /* steps 7-11: run RSA, if it succeeds, firmware is available to
- be swapped */
+ /*
+ * steps 7-11: run RSA, if it succeeds, firmware is available to
+ * be swapped
+ */
return run_rsa(dd, "PCIe serdes", fdet->signature);
}
@@ -1285,7 +1344,7 @@ static void set_serdes_broadcast(struct hfi1_devdata *dd, u8 bg1, u8 bg2,
* 23:16 BROADCAST_GROUP_2 (default 0xff)
*/
sbus_request(dd, addrs[count], 0xfd, WRITE_SBUS_RECEIVER,
- (u32)bg1 << 4 | (u32)bg2 << 16);
+ (u32)bg1 << 4 | (u32)bg2 << 16);
}
}
@@ -1310,8 +1369,8 @@ retry:
/* timed out */
dd_dev_err(dd,
- "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
- (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
+ "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
+ (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
if (try == 0) {
/* break mutex and retry */
@@ -1328,10 +1387,197 @@ void release_hw_mutex(struct hfi1_devdata *dd)
write_csr(dd, ASIC_CFG_MUTEX, 0);
}
+/* return the given resource bit(s) as a mask for the given HFI */
+static inline u64 resource_mask(u32 hfi1_id, u32 resource)
+{
+ return ((u64)resource) << (hfi1_id ? CR_DYN_SHIFT : 0);
+}
+
+static void fail_mutex_acquire_message(struct hfi1_devdata *dd,
+ const char *func)
+{
+ dd_dev_err(dd,
+ "%s: hardware mutex stuck - suggest rebooting the machine\n",
+ func);
+}
+
+/*
+ * Acquire access to a chip resource.
+ *
+ * Return 0 on success, -EBUSY if resource busy, -EIO if mutex acquire failed.
+ */
+static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+ u64 scratch0, all_bits, my_bit;
+ int ret;
+
+ if (resource & CR_DYN_MASK) {
+ /* a dynamic resource is in use if either HFI has set the bit */
+ all_bits = resource_mask(0, resource) |
+ resource_mask(1, resource);
+ my_bit = resource_mask(dd->hfi1_id, resource);
+ } else {
+ /* non-dynamic resources are not split between HFIs */
+ all_bits = resource;
+ my_bit = resource;
+ }
+
+ /* lock against other callers within the driver wanting a resource */
+ mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+ ret = acquire_hw_mutex(dd);
+ if (ret) {
+ fail_mutex_acquire_message(dd, __func__);
+ ret = -EIO;
+ goto done;
+ }
+
+ scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+ if (scratch0 & all_bits) {
+ ret = -EBUSY;
+ } else {
+ write_csr(dd, ASIC_CFG_SCRATCH, scratch0 | my_bit);
+ /* force write to be visible to other HFI on another OS */
+ (void)read_csr(dd, ASIC_CFG_SCRATCH);
+ }
+
+ release_hw_mutex(dd);
+
+done:
+ mutex_unlock(&dd->asic_data->asic_resource_mutex);
+ return ret;
+}
+
+/*
+ * Acquire access to a chip resource, wait up to mswait milliseconds for
+ * the resource to become available.
+ *
+ * Return 0 on success, -EBUSY if busy (even after wait), -EIO if mutex
+ * acquire failed.
+ */
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait)
+{
+ unsigned long timeout;
+ int ret;
+
+ timeout = jiffies + msecs_to_jiffies(mswait);
+ while (1) {
+ ret = __acquire_chip_resource(dd, resource);
+ if (ret != -EBUSY)
+ return ret;
+ /* resource is busy, check our timeout */
+ if (time_after_eq(jiffies, timeout))
+ return -EBUSY;
+ usleep_range(80, 120); /* arbitrary delay */
+ }
+}
+
+/*
+ * Release access to a chip resource
+ */
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+ u64 scratch0, bit;
+
+ /* only dynamic resources should ever be cleared */
+ if (!(resource & CR_DYN_MASK)) {
+ dd_dev_err(dd, "%s: invalid resource 0x%x\n", __func__,
+ resource);
+ return;
+ }
+ bit = resource_mask(dd->hfi1_id, resource);
+
+ /* lock against other callers within the driver wanting a resource */
+ mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+ if (acquire_hw_mutex(dd)) {
+ fail_mutex_acquire_message(dd, __func__);
+ goto done;
+ }
+
+ scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+ if ((scratch0 & bit) != 0) {
+ scratch0 &= ~bit;
+ write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+ /* force write to be visible to other HFI on another OS */
+ (void)read_csr(dd, ASIC_CFG_SCRATCH);
+ } else {
+ dd_dev_warn(dd, "%s: id %d, resource 0x%x: bit not set\n",
+ __func__, dd->hfi1_id, resource);
+ }
+
+ release_hw_mutex(dd);
+
+done:
+ mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+/*
+ * Return true if resource is set, false otherwise. Print a warning
+ * if not set and a function is supplied.
+ */
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+ const char *func)
+{
+ u64 scratch0, bit;
+
+ if (resource & CR_DYN_MASK)
+ bit = resource_mask(dd->hfi1_id, resource);
+ else
+ bit = resource;
+
+ scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+ if ((scratch0 & bit) == 0) {
+ if (func)
+ dd_dev_warn(dd,
+ "%s: id %d, resource 0x%x, not acquired!\n",
+ func, dd->hfi1_id, resource);
+ return false;
+ }
+ return true;
+}
+
+static void clear_chip_resources(struct hfi1_devdata *dd, const char *func)
+{
+ u64 scratch0;
+
+ /* lock against other callers within the driver wanting a resource */
+ mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+ if (acquire_hw_mutex(dd)) {
+ fail_mutex_acquire_message(dd, func);
+ goto done;
+ }
+
+ /* clear all dynamic access bits for this HFI */
+ scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+ scratch0 &= ~resource_mask(dd->hfi1_id, CR_DYN_MASK);
+ write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+ /* force write to be visible to other HFI on another OS */
+ (void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+ release_hw_mutex(dd);
+
+done:
+ mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+void init_chip_resources(struct hfi1_devdata *dd)
+{
+ /* clear any holds left by us */
+ clear_chip_resources(dd, __func__);
+}
+
+void finish_chip_resources(struct hfi1_devdata *dd)
+{
+ /* clear any holds left by us */
+ clear_chip_resources(dd, __func__);
+}
+
void set_sbus_fast_mode(struct hfi1_devdata *dd)
{
write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
- ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
+ ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
}
void clear_sbus_fast_mode(struct hfi1_devdata *dd)
@@ -1354,23 +1600,23 @@ int load_firmware(struct hfi1_devdata *dd)
int ret;
if (fw_fabric_serdes_load) {
- ret = acquire_hw_mutex(dd);
+ ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
if (ret)
return ret;
set_sbus_fast_mode(dd);
set_serdes_broadcast(dd, all_fabric_serdes_broadcast,
- fabric_serdes_broadcast[dd->hfi1_id],
- fabric_serdes_addrs[dd->hfi1_id],
- NUM_FABRIC_SERDES);
+ fabric_serdes_broadcast[dd->hfi1_id],
+ fabric_serdes_addrs[dd->hfi1_id],
+ NUM_FABRIC_SERDES);
turn_off_spicos(dd, SPICO_FABRIC);
do {
ret = load_fabric_serdes_firmware(dd, &fw_fabric);
} while (retry_firmware(dd, ret));
clear_sbus_fast_mode(dd);
- release_hw_mutex(dd);
+ release_chip_resource(dd, CR_SBUS);
if (ret)
return ret;
}
@@ -1419,18 +1665,57 @@ int hfi1_firmware_init(struct hfi1_devdata *dd)
return obtain_firmware(dd);
}
+/*
+ * This function is a helper function for parse_platform_config(...) and
+ * does not check for validity of the platform configuration cache
+ * (because we know it is invalid as we are building up the cache).
+ * As such, this should not be called from anywhere other than
+ * parse_platform_config
+ */
+static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table)
+{
+ u32 meta_ver, meta_ver_meta, ver_start, ver_len, mask;
+ struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
+
+ if (!system_table)
+ return -EINVAL;
+
+ meta_ver_meta =
+ *(pcfgcache->config_tables[PLATFORM_CONFIG_SYSTEM_TABLE].table_metadata
+ + SYSTEM_TABLE_META_VERSION);
+
+ mask = ((1 << METADATA_TABLE_FIELD_START_LEN_BITS) - 1);
+ ver_start = meta_ver_meta & mask;
+
+ meta_ver_meta >>= METADATA_TABLE_FIELD_LEN_SHIFT;
+
+ mask = ((1 << METADATA_TABLE_FIELD_LEN_LEN_BITS) - 1);
+ ver_len = meta_ver_meta & mask;
+
+ ver_start /= 8;
+ meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1);
+
+ if (meta_ver < 5) {
+ dd_dev_info(
+ dd, "%s:Please update platform config\n", __func__);
+ return -EINVAL;
+ }
+ return 0;
+}
+
int parse_platform_config(struct hfi1_devdata *dd)
{
struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
u32 *ptr = NULL;
- u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0;
+ u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0;
u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
+ int ret = -EINVAL; /* assume failure */
- if (platform_config == NULL) {
+ if (!dd->platform_config.data) {
dd_dev_info(dd, "%s: Missing config file\n", __func__);
goto bail;
}
- ptr = (u32 *)platform_config->data;
+ ptr = (u32 *)dd->platform_config.data;
magic_num = *ptr;
ptr++;
@@ -1439,12 +1724,32 @@ int parse_platform_config(struct hfi1_devdata *dd)
goto bail;
}
- while (ptr < (u32 *)(platform_config->data + platform_config->size)) {
+ /* Field is file size in DWORDs */
+ file_length = (*ptr) * 4;
+ ptr++;
+
+ if (file_length > dd->platform_config.size) {
+ dd_dev_info(dd, "%s:File claims to be larger than read size\n",
+ __func__);
+ goto bail;
+ } else if (file_length < dd->platform_config.size) {
+ dd_dev_info(dd,
+ "%s:File claims to be smaller than read size, continuing\n",
+ __func__);
+ }
+ /* exactly equal, perfection */
+
+ /*
+ * In both cases where we proceed, using the self-reported file length
+ * is the safer option
+ */
+ while (ptr < (u32 *)(dd->platform_config.data + file_length)) {
header1 = *ptr;
header2 = *(ptr + 1);
if (header1 != ~header2) {
dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
- __func__, (ptr - (u32 *)platform_config->data));
+ __func__, (ptr - (u32 *)
+ dd->platform_config.data));
goto bail;
}
@@ -1467,6 +1772,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
case PLATFORM_CONFIG_SYSTEM_TABLE:
pcfgcache->config_tables[table_type].num_table =
1;
+ ret = check_meta_version(dd, ptr);
+ if (ret)
+ goto bail;
break;
case PLATFORM_CONFIG_PORT_TABLE:
pcfgcache->config_tables[table_type].num_table =
@@ -1484,9 +1792,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
break;
default:
dd_dev_info(dd,
- "%s: Unknown data table %d, offset %ld\n",
- __func__, table_type,
- (ptr - (u32 *)platform_config->data));
+ "%s: Unknown data table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr - (u32 *)
+ dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1507,9 +1816,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
break;
default:
dd_dev_info(dd,
- "%s: Unknown metadata table %d, offset %ld\n",
- __func__, table_type,
- (ptr - (u32 *)platform_config->data));
+ "%s: Unknown meta table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr -
+ (u32 *)dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@@ -1518,14 +1828,16 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* Calculate and check table crc */
crc = crc32_le(~(u32)0, (unsigned char const *)ptr,
- (table_length_dwords * 4));
+ (table_length_dwords * 4));
crc ^= ~(u32)0;
/* Jump the table */
ptr += table_length_dwords;
if (crc != *ptr) {
dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
- __func__, (ptr - (u32 *)platform_config->data));
+ __func__, (ptr -
+ (u32 *)
+ dd->platform_config.data));
goto bail;
}
/* Jump the CRC DWORD */
@@ -1536,11 +1848,12 @@ int parse_platform_config(struct hfi1_devdata *dd)
return 0;
bail:
memset(pcfgcache, 0, sizeof(struct platform_config_cache));
- return -EINVAL;
+ return ret;
}
static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
- int field, u32 *field_len_bits, u32 *field_start_bits)
+ int field, u32 *field_len_bits,
+ u32 *field_start_bits)
{
struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
u32 *src_ptr = NULL;
@@ -1600,8 +1913,9 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
* @len: length of memory pointed by @data in bytes.
*/
int get_platform_config_field(struct hfi1_devdata *dd,
- enum platform_config_table_type_encoding table_type,
- int table_index, int field_index, u32 *data, u32 len)
+ enum platform_config_table_type_encoding
+ table_type, int table_index, int field_index,
+ u32 *data, u32 len)
{
int ret = 0, wlen = 0, seek = 0;
u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL;
@@ -1613,7 +1927,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
return -EINVAL;
ret = get_platform_fw_field_metadata(dd, table_type, field_index,
- &field_len_bits, &field_start_bits);
+ &field_len_bits,
+ &field_start_bits);
if (ret)
return -EINVAL;
@@ -1629,19 +1944,21 @@ int get_platform_config_field(struct hfi1_devdata *dd,
if (len < field_len_bits)
return -EINVAL;
- seek = field_start_bits/8;
- wlen = field_len_bits/8;
+ seek = field_start_bits / 8;
+ wlen = field_len_bits / 8;
src_ptr = (u32 *)((u8 *)src_ptr + seek);
- /* We expect the field to be byte aligned and whole byte
- * lengths if we are here */
+ /*
+ * We expect the field to be byte aligned and whole byte
+ * lengths if we are here
+ */
memcpy(data, src_ptr, wlen);
return 0;
}
break;
case PLATFORM_CONFIG_PORT_TABLE:
- /* Port table is 4 DWORDS in META_VERSION 0 */
+ /* Port table is 4 DWORDS */
src_ptr = dd->hfi1_id ?
pcfgcache->config_tables[table_type].table + 4 :
pcfgcache->config_tables[table_type].table;
@@ -1669,7 +1986,7 @@ int get_platform_config_field(struct hfi1_devdata *dd,
if (!src_ptr || len < field_len_bits)
return -EINVAL;
- src_ptr += (field_start_bits/32);
+ src_ptr += (field_start_bits / 32);
*data = (*src_ptr >> (field_start_bits % 32)) &
((1 << field_len_bits) - 1);
@@ -1680,7 +1997,7 @@ int get_platform_config_field(struct hfi1_devdata *dd,
* Download the firmware needed for the Gen3 PCIe SerDes. An update
* to the SBus firmware is needed before updating the PCIe firmware.
*
- * Note: caller must be holding the HW mutex.
+ * Note: caller must be holding the SBus resource.
*/
int load_pcie_firmware(struct hfi1_devdata *dd)
{
@@ -1701,9 +2018,9 @@ int load_pcie_firmware(struct hfi1_devdata *dd)
if (fw_pcie_serdes_load) {
dd_dev_info(dd, "Setting PCIe SerDes broadcast\n");
set_serdes_broadcast(dd, all_pcie_serdes_broadcast,
- pcie_serdes_broadcast[dd->hfi1_id],
- pcie_serdes_addrs[dd->hfi1_id],
- NUM_PCIE_SERDES);
+ pcie_serdes_broadcast[dd->hfi1_id],
+ pcie_serdes_addrs[dd->hfi1_id],
+ NUM_PCIE_SERDES);
do {
ret = load_pcie_serdes_firmware(dd, &fw_pcie);
} while (retry_firmware(dd, ret));
@@ -1724,9 +2041,9 @@ void read_guid(struct hfi1_devdata *dd)
{
/* Take the DC out of reset to get a valid GUID value */
write_csr(dd, CCE_DC_CTRL, 0);
- (void) read_csr(dd, CCE_DC_CTRL);
+ (void)read_csr(dd, CCE_DC_CTRL);
dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID);
dd_dev_info(dd, "GUID %llx",
- (unsigned long long)dd->base_guid);
+ (unsigned long long)dd->base_guid);
}
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index d4826a9ab8d3..16cbdc4073e0 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h
@@ -1,14 +1,13 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -65,6 +62,7 @@
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
#include "chip_registers.h"
#include "common.h"
@@ -73,7 +71,8 @@
#include "chip.h"
#include "mad.h"
#include "qsfp.h"
-#include "platform_config.h"
+#include "platform.h"
+#include "affinity.h"
/* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U
@@ -98,6 +97,8 @@ extern unsigned long hfi1_cap_mask;
#define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
#define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
HFI1_CAP_MISC_MASK)
+/* Offline Disabled Reason is 4-bits */
+#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
/*
* Control context is always 0 and handles the error packets.
@@ -177,6 +178,11 @@ struct ctxt_eager_bufs {
} *rcvtids;
};
+struct exp_tid_set {
+ struct list_head list;
+ u32 count;
+};
+
struct hfi1_ctxtdata {
/* shadow the ctxt's RcvCtrl register */
u64 rcvctrl;
@@ -233,20 +239,13 @@ struct hfi1_ctxtdata {
u32 expected_count;
/* index of first expected TID entry. */
u32 expected_base;
- /* cursor into the exp group sets */
- atomic_t tidcursor;
- /* number of exp TID groups assigned to the ctxt */
- u16 numtidgroups;
- /* size of exp TID group fields in tidusemap */
- u16 tidmapcnt;
- /* exp TID group usage bitfield array */
- unsigned long *tidusemap;
- /* pinned pages for exp sends, allocated at open */
- struct page **tid_pg_list;
- /* dma handles for exp tid pages */
- dma_addr_t *physshadow;
+
+ struct exp_tid_set tid_group_list;
+ struct exp_tid_set tid_used_list;
+ struct exp_tid_set tid_full_list;
+
/* lock protecting all Expected TID data */
- spinlock_t exp_lock;
+ struct mutex exp_lock;
/* number of pio bufs for this ctxt (all procs, if shared) */
u32 piocnt;
/* first pio buffer for this ctxt */
@@ -311,8 +310,24 @@ struct hfi1_ctxtdata {
*/
struct task_struct *progress;
struct list_head sdma_queues;
+ /* protect sdma queues */
spinlock_t sdma_qlock;
+ /* Is ASPM interrupt supported for this context */
+ bool aspm_intr_supported;
+ /* ASPM state (enabled/disabled) for this context */
+ bool aspm_enabled;
+ /* Timer for re-enabling ASPM if interrupt activity quietens down */
+ struct timer_list aspm_timer;
+ /* Lock to serialize between intr, timer intr and user threads */
+ spinlock_t aspm_lock;
+ /* Is ASPM processing enabled for this context (in intr context) */
+ bool aspm_intr_enable;
+ /* Last interrupt timestamp */
+ ktime_t aspm_ts_last_intr;
+ /* Last timestamp at which we scheduled a timer for this context */
+ ktime_t aspm_ts_timer_sched;
+
/*
* The interrupt handler for a particular receive context can vary
* throughout it's lifetime. This is not a lock protected data member so
@@ -335,7 +350,7 @@ struct hfi1_packet {
void *hdr;
struct hfi1_ctxtdata *rcd;
__le32 *rhf_addr;
- struct hfi1_qp *qp;
+ struct rvt_qp *qp;
struct hfi1_other_headers *ohdr;
u64 rhf;
u32 maxcnt;
@@ -363,6 +378,7 @@ struct hfi1_snoop_data {
int mode_flag;
struct cdev cdev;
struct device *class_dev;
+ /* protect snoop data */
spinlock_t snoop_lock;
struct list_head queue;
wait_queue_head_t waitq;
@@ -375,7 +391,7 @@ struct hfi1_snoop_data {
#define HFI1_PORT_SNOOP_MODE 1U
#define HFI1_PORT_CAPTURE_MODE 2U
-struct hfi1_sge_state;
+struct rvt_sge_state;
/*
* Get/Set IB link-level config parameters for f_get/set_ib_cfg()
@@ -424,17 +440,17 @@ struct hfi1_sge_state;
#define __HLS_GOING_OFFLINE_BP 9
#define __HLS_LINK_COOLDOWN_BP 10
-#define HLS_UP_INIT (1 << __HLS_UP_INIT_BP)
-#define HLS_UP_ARMED (1 << __HLS_UP_ARMED_BP)
-#define HLS_UP_ACTIVE (1 << __HLS_UP_ACTIVE_BP)
-#define HLS_DN_DOWNDEF (1 << __HLS_DN_DOWNDEF_BP) /* link down default */
-#define HLS_DN_POLL (1 << __HLS_DN_POLL_BP)
-#define HLS_DN_DISABLE (1 << __HLS_DN_DISABLE_BP)
-#define HLS_DN_OFFLINE (1 << __HLS_DN_OFFLINE_BP)
-#define HLS_VERIFY_CAP (1 << __HLS_VERIFY_CAP_BP)
-#define HLS_GOING_UP (1 << __HLS_GOING_UP_BP)
-#define HLS_GOING_OFFLINE (1 << __HLS_GOING_OFFLINE_BP)
-#define HLS_LINK_COOLDOWN (1 << __HLS_LINK_COOLDOWN_BP)
+#define HLS_UP_INIT BIT(__HLS_UP_INIT_BP)
+#define HLS_UP_ARMED BIT(__HLS_UP_ARMED_BP)
+#define HLS_UP_ACTIVE BIT(__HLS_UP_ACTIVE_BP)
+#define HLS_DN_DOWNDEF BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
+#define HLS_DN_POLL BIT(__HLS_DN_POLL_BP)
+#define HLS_DN_DISABLE BIT(__HLS_DN_DISABLE_BP)
+#define HLS_DN_OFFLINE BIT(__HLS_DN_OFFLINE_BP)
+#define HLS_VERIFY_CAP BIT(__HLS_VERIFY_CAP_BP)
+#define HLS_GOING_UP BIT(__HLS_GOING_UP_BP)
+#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
+#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
@@ -490,6 +506,7 @@ struct hfi1_sge_state;
#define CNTR_DISABLED 0x2 /* Disable this counter */
#define CNTR_32BIT 0x4 /* Simulate 64 bits for this counter */
#define CNTR_VL 0x8 /* Per VL counter */
+#define CNTR_SDMA 0x10
#define CNTR_INVALID_VL -1 /* Specifies invalid VL */
#define CNTR_MODE_W 0x0
#define CNTR_MODE_R 0x1
@@ -512,10 +529,11 @@ static inline void incr_cntr32(u32 *cntr)
#define MAX_NAME_SIZE 64
struct hfi1_msix_entry {
+ enum irq_type type;
struct msix_entry msix;
void *arg;
char name[MAX_NAME_SIZE];
- cpumask_var_t mask;
+ cpumask_t mask;
};
/* per-SL CCA information */
@@ -542,6 +560,7 @@ enum {
};
struct vl_arb_cache {
+ /* protect vl arb cache */
spinlock_t lock;
struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
};
@@ -561,7 +580,8 @@ struct hfi1_pportdata {
struct kobject sl2sc_kobj;
struct kobject vl2mtu_kobj;
- /* QSFP support */
+ /* PHY support */
+ u32 port_type;
struct qsfp_data qsfp_info;
/* GUID for this interface, in host order */
@@ -586,6 +606,7 @@ struct hfi1_pportdata {
struct work_struct link_vc_work;
struct work_struct link_up_work;
struct work_struct link_down_work;
+ struct work_struct dc_host_req_work;
struct work_struct sma_message_work;
struct work_struct freeze_work;
struct work_struct link_downgrade_work;
@@ -623,6 +644,7 @@ struct hfi1_pportdata {
u16 link_speed_active;
u8 vls_supported;
u8 vls_operational;
+ u8 actual_vls_operational;
/* LID mask control */
u8 lmc;
/* Rx Polarity inversion (compensate for ~tx on partner) */
@@ -642,19 +664,23 @@ struct hfi1_pportdata {
u8 link_enabled; /* link enabled? */
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
+ u8 last_pstate; /* info only */
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
u8 phy_error_threshold;
- /* used to override LED behavior */
- u8 led_override; /* Substituted for normal value, if non-zero */
- u16 led_override_timeoff; /* delta to next timer event */
- u8 led_override_vals[2]; /* Alternates per blink-frame */
- u8 led_override_phase; /* Just counts, LSB picks from vals[] */
+ /* Used to override LED behavior for things like maintenance beaconing*/
+ /*
+ * Alternates per phase of blink
+ * [0] holds LED off duration, [1] holds LED on duration
+ */
+ unsigned long led_override_vals[2];
+ u8 led_override_phase; /* LSB picks from vals[] */
atomic_t led_override_timer_active;
/* Used to flash LEDs in override mode */
struct timer_list led_override_timer;
+
u32 sm_trap_qp;
u32 sa_qp;
@@ -689,10 +715,12 @@ struct hfi1_pportdata {
/* CA's max number of 64 entry units in the congestion control table */
u8 cc_max_table_entries;
- /* begin congestion log related entries
- * cc_log_lock protects all congestion log related data */
+ /*
+ * begin congestion log related entries
+ * cc_log_lock protects all congestion log related data
+ */
spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
- u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+ u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
u16 threshold_event_counter;
struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
int cc_log_idx; /* index for logging events */
@@ -705,8 +733,9 @@ struct hfi1_pportdata {
u64 *cntrs;
/* port relative synthetic counter buffer */
u64 *scntrs;
- /* we synthesize port_xmit_discards from several egress errors */
+ /* port_xmit_discards are synthesized from different egress errors */
u64 port_xmit_discards;
+ u64 port_xmit_discards_vl[C_VL_COUNT];
u64 port_xmit_constraint_errors;
u64 port_rcv_constraint_errors;
/* count of 'link_err' interrupts from DC */
@@ -728,6 +757,9 @@ struct hfi1_pportdata {
u8 remote_link_down_reason;
/* Error events that will cause a port bounce. */
u32 port_error_action;
+ struct work_struct linkstate_active_work;
+ /* Does this port need to prescan for FECNs */
+ bool cc_prescan;
};
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
@@ -773,6 +805,12 @@ struct hfi1_temp {
u8 triggers; /* temperature triggers */
};
+/* common data between shared ASIC HFIs */
+struct hfi1_asic_data {
+ struct hfi1_devdata *dds[2]; /* back pointers */
+ struct mutex asic_resource_mutex;
+};
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -782,6 +820,7 @@ struct sdma_vl_map;
#define BOARD_VERS_MAX 96 /* how long the version string can be */
#define SERIAL_MAX 16 /* length of the serial number */
+typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
struct hfi1_devdata {
struct hfi1_ibdev verbs_dev; /* must be first */
struct list_head list;
@@ -811,6 +850,12 @@ struct hfi1_devdata {
spinlock_t sc_lock;
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
+ /* lock for pio_map */
+ spinlock_t pio_map_lock;
+ /* array of kernel send contexts */
+ struct send_context **kernel_send_context;
+ /* array of vl maps */
+ struct pio_vl_map __rcu *pio_map;
/* seqlock for sc2vl */
seqlock_t sc2vl_lock;
u64 sc2vl[4];
@@ -841,6 +886,8 @@ struct hfi1_devdata {
wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count;
+ /* common data between shared ASIC HFIs in this OS */
+ struct hfi1_asic_data *asic_data;
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
@@ -873,10 +920,11 @@ struct hfi1_devdata {
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
+ u64 z_send_schedule;
/* percpu int_counter */
u64 __percpu *int_counter;
u64 __percpu *rcv_limit;
-
+ u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
/* number of pio send contexts in use by the driver */
@@ -885,6 +933,8 @@ struct hfi1_devdata {
* number of ctxts available for PSM open
*/
u32 freectxts;
+ /* total number of available user/PSM contexts */
+ u32 num_user_contexts;
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
@@ -996,9 +1046,8 @@ struct hfi1_devdata {
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
+ struct platform_config platform_config;
struct platform_config_cache pcfg_cache;
- /* control high-level access to qsfp */
- struct mutex qsfp_i2c_mutex;
struct diag_client *diag_client;
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
@@ -1008,8 +1057,6 @@ struct hfi1_devdata {
u16 psxmitwait_check_rate;
/* high volume overflow errors deferred to tasklet */
struct tasklet_struct error_tasklet;
- /* per device cq worker */
- struct kthread_worker *worker;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1090,10 +1137,8 @@ struct hfi1_devdata {
* Handlers for outgoing data so that snoop/capture does not
* have to have its hooks in the send path
*/
- int (*process_pio_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
- u64 pbc);
- int (*process_dma_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
- u64 pbc);
+ send_routine process_pio_send;
+ send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
@@ -1105,7 +1150,6 @@ struct hfi1_devdata {
struct timer_list rcverr_timer;
u32 rcv_ovfl_cnt;
- int assigned_node_id;
wait_queue_head_t event_queue;
/* Save the enabled LCB error bits */
@@ -1115,6 +1159,16 @@ struct hfi1_devdata {
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_physaddr;
+
+ bool eprom_available; /* true if EPROM is available for this device */
+ bool aspm_supported; /* Does HW support ASPM */
+ bool aspm_enabled; /* ASPM state: enabled/disabled */
+ /* Serialize ASPM enable/disable between multiple verbs contexts */
+ spinlock_t aspm_lock;
+ /* Number of verbs contexts which have disabled ASPM */
+ atomic_t aspm_disabled_cnt;
+
+ struct hfi1_affinity *affinity;
};
/* 8051 firmware version helper */
@@ -1125,6 +1179,9 @@ struct hfi1_devdata {
#define PT_EAGER 1
#define PT_INVALID 2
+struct tid_rb_node;
+struct mmu_rb_node;
+
/* Private data for file operations */
struct hfi1_filedata {
struct hfi1_ctxtdata *uctxt;
@@ -1133,6 +1190,16 @@ struct hfi1_filedata {
struct hfi1_user_sdma_pkt_q *pq;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
+ u32 tid_n_pinned;
+ struct rb_root tid_rb_root;
+ struct tid_rb_node **entry_to_rb;
+ spinlock_t tid_lock; /* protect tid_[limit,used] counters */
+ u32 tid_limit;
+ u32 tid_used;
+ u32 *invalid_tids;
+ u32 invalid_tid_idx;
+ /* protect invalid_tids array and invalid_tid_idx */
+ spinlock_t invalid_lock;
};
extern struct list_head hfi1_dev_list;
@@ -1156,7 +1223,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
struct hfi1_devdata *, u8, u8);
void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
@@ -1164,6 +1231,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+void set_all_slowpath(struct hfi1_devdata *dd);
/* receive packet handler dispositions */
#define RCV_PKT_OK 0x0 /* keep going */
@@ -1184,6 +1252,15 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
return ppd->lstate; /* use the cached value */
}
+void receive_interrupt_work(struct work_struct *work);
+
+/* extract service channel from header and rhf */
+static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+{
+ return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
+ ((!!(rhf & RHF_DC_INFO_MASK)) << 4);
+}
+
static inline u16 generate_jkey(kuid_t uid)
{
return from_kuid(current_user_ns(), uid) & 0xffff;
@@ -1253,7 +1330,7 @@ static inline u32 egress_cycles(u32 len, u32 rate)
void set_link_ipg(struct hfi1_pportdata *ppd);
void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
u32 rqpn, u8 svc_type);
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh);
@@ -1424,6 +1501,7 @@ static inline int valid_ib_mtu(unsigned int mtu)
mtu == 1024 || mtu == 2048 ||
mtu == 4096;
}
+
static inline int valid_opa_max_mtu(unsigned int mtu)
{
return mtu >= 2048 &&
@@ -1445,12 +1523,13 @@ void reset_link_credits(struct hfi1_devdata *dd);
void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
{
@@ -1472,6 +1551,11 @@ static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp)
return container_of(ibp, struct hfi1_pportdata, ibport_data);
}
+static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
+{
+ return container_of(rdi, struct hfi1_ibdev, rdi);
+}
+
static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1515,12 +1599,10 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
#define HFI1_HAS_SDMA_TIMEOUT 0x8
#define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */
#define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */
-#define HFI1_DO_INIT_ASIC 0x100 /* This device will init the ASIC */
/* IB dword length mask in PBC (lower 11 bits); same for all chips */
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
-
/* ctxt_flag bit offsets */
/* context has been setup */
#define HFI1_CTXT_SETUP_DONE 1
@@ -1538,14 +1620,10 @@ void hfi1_free_devdata(struct hfi1_devdata *);
void cc_state_reclaim(struct rcu_head *rcu);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define HFI1_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define HFI1_LED_LOG 2 /* Logical (link) YELLOW LED */
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
+/* LED beaconing functions */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+ unsigned int timeoff);
+void shutdown_led_override(struct hfi1_pportdata *ppd);
#define HFI1_CREDIT_RETURN_RATE (100)
@@ -1587,12 +1665,13 @@ void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
*/
#define DEFAULT_RCVHDR_ENTSIZE 32
+bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32);
int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **);
-void hfi1_release_user_pages(struct page **, size_t, bool);
+void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool);
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
- *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL;
+ *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
}
static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -1601,7 +1680,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
* volatile because it's a DMA target from the chip, routine is
* inlined, and don't want register caching or reordering.
*/
- return (u32) le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+ return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
}
/*
@@ -1633,12 +1712,13 @@ void restore_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
- enum platform_config_table_type_encoding table_type,
- int table_index, int field_index, u32 *data, u32 len);
+ enum platform_config_table_type_encoding
+ table_type, int table_index, int field_index,
+ u32 *data, u32 len);
-dma_addr_t hfi1_map_page(struct pci_dev *, struct page *, unsigned long,
- size_t, int);
const char *get_unit_name(int unit);
+const char *get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
/*
* Flush write combining store buffers (if present) and perform a write
@@ -1659,7 +1739,7 @@ int process_receive_invalid(struct hfi1_packet *packet);
extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
-void update_sge(struct hfi1_sge_state *ss, u32 length);
+void update_sge(struct rvt_sge_state *ss, u32 length);
/* global module parameter variables */
extern unsigned int hfi1_max_mtu;
@@ -1667,7 +1747,7 @@ extern unsigned int hfi1_cu;
extern unsigned int user_credit_return_threshold;
extern int num_user_contexts;
extern unsigned n_krcvqs;
-extern u8 krcvqs[];
+extern uint krcvqs[];
extern int krcvqsset;
extern uint kdeth_qp;
extern uint loopback;
@@ -1829,13 +1909,14 @@ static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd)
dd->z_int_counter = get_all_cpu_total(dd->int_counter);
dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit);
+ dd->z_send_schedule = get_all_cpu_total(dd->send_schedule);
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
- ppd->ibport_data.z_rc_acks =
- get_all_cpu_total(ppd->ibport_data.rc_acks);
- ppd->ibport_data.z_rc_qacks =
- get_all_cpu_total(ppd->ibport_data.rc_qacks);
+ ppd->ibport_data.rvp.z_rc_acks =
+ get_all_cpu_total(ppd->ibport_data.rvp.rc_acks);
+ ppd->ibport_data.rvp.z_rc_qacks =
+ get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks);
}
}
@@ -1848,6 +1929,18 @@ static inline void setextled(struct hfi1_devdata *dd, u32 on)
write_csr(dd, DCC_CFG_LED_CNTRL, 0x10);
}
+/* return the i2c resource given the target */
+static inline u32 i2c_target(u32 target)
+{
+ return target ? CR_I2C2 : CR_I2C1;
+}
+
+/* return the i2c chain chip resource that this HFI uses for QSFP */
+static inline u32 qsfp_resource(struct hfi1_devdata *dd)
+{
+ return i2c_target(dd->hfi1_id);
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#endif /* _HFI1_KERNEL_H */
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 02df291eb172..cfcdc16b41c3 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -56,6 +53,7 @@
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
+#include <rdma/rdma_vt.h>
#include "hfi.h"
#include "device.h"
@@ -65,6 +63,7 @@
#include "sdma.h"
#include "debugfs.h"
#include "verbs.h"
+#include "aspm.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -75,6 +74,7 @@
#define HFI1_MIN_USER_CTXT_BUFCNT 7
#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
@@ -87,9 +87,9 @@ module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO);
MODULE_PARM_DESC(
num_user_contexts, "Set max number of user contexts to use");
-u8 krcvqs[RXE_NUM_DATA_VL];
+uint krcvqs[RXE_NUM_DATA_VL];
int krcvqsset;
-module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO);
+module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
/* computed based on above array */
@@ -128,16 +128,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
{
unsigned i;
int ret;
- int local_node_id = pcibus_to_node(dd->pcidev->bus);
/* Control context has to be always 0 */
BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
- if (local_node_id < 0)
- local_node_id = numa_node_id();
- dd->assigned_node_id = local_node_id;
-
- dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL);
+ dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
+ GFP_KERNEL, dd->node);
if (!dd->rcd)
goto nomem;
@@ -147,10 +143,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd;
ppd = dd->pport + (i % dd->num_pports);
- rcd = hfi1_create_ctxtdata(ppd, i);
+ rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
if (!rcd) {
dd_dev_err(dd,
- "Unable to allocate kernel receive context, failing\n");
+ "Unable to allocate kernel receive context, failing\n");
goto nomem;
}
/*
@@ -171,7 +167,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
if (!rcd->sc) {
dd_dev_err(dd,
- "Unable to allocate kernel send context, failing\n");
+ "Unable to allocate kernel send context, failing\n");
dd->rcd[rcd->ctxt] = NULL;
hfi1_free_ctxtdata(dd, rcd);
goto nomem;
@@ -189,6 +185,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
}
}
+ /*
+ * Initialize aspm, to be done after gen3 transition and setting up
+ * contexts and before enabling interrupts
+ */
+ aspm_init(dd);
+
return 0;
nomem:
ret = -ENOMEM;
@@ -201,7 +203,8 @@ bail:
/*
* Common code for user and kernel context setup.
*/
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+ int numa)
{
struct hfi1_devdata *dd = ppd->dd;
struct hfi1_ctxtdata *rcd;
@@ -224,10 +227,10 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
rcd->cnt = 1;
rcd->ctxt = ctxt;
dd->rcd[ctxt] = rcd;
- rcd->numa_id = numa_node_id();
+ rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
- spin_lock_init(&rcd->exp_lock);
+ mutex_init(&rcd->exp_lock);
/*
* Calculate the context's RcvArray entry starting point.
@@ -260,7 +263,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
/* Validate and initialize Rcv Hdr Q variables */
if (rcvhdrcnt % HDRQ_INCREMENT) {
dd_dev_err(dd,
- "ctxt%u: header queue count %d must be divisible by %d\n",
+ "ctxt%u: header queue count %d must be divisible by %lu\n",
rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
goto bail;
}
@@ -379,7 +382,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
cc_state = get_cc_state(ppd);
- if (cc_state == NULL)
+ if (!cc_state)
/*
* This should _never_ happen - rcu_read_lock() is held,
* and set_link_ipg() should not be called if cc_state
@@ -431,7 +434,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
cc_state = get_cc_state(ppd);
- if (cc_state == NULL) {
+ if (!cc_state) {
rcu_read_unlock();
return HRTIMER_NORESTART;
}
@@ -493,14 +496,19 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
INIT_WORK(&ppd->link_up_work, handle_link_up);
INIT_WORK(&ppd->link_down_work, handle_link_down);
+ INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
INIT_WORK(&ppd->freeze_work, handle_freeze);
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message);
INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+ INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
+ INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
+
mutex_init(&ppd->hls_lock);
spin_lock_init(&ppd->sdma_alllock);
spin_lock_init(&ppd->qsfp_info.qsfp_lock);
+ ppd->qsfp_info.ppd = ppd;
ppd->sm_trap_qp = 0x0;
ppd->sa_qp = 0x1;
@@ -582,8 +590,8 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
for (i = 0; i < dd->first_user_ctxt; ++i) {
+ rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR))
@@ -729,7 +737,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
lastfail = hfi1_setup_eagerbufs(rcd);
if (lastfail)
dd_dev_err(dd,
- "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+ "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
}
if (lastfail)
ret = lastfail;
@@ -762,7 +770,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
/* enable chip even if we have an error, so we can debug cause */
enable_chip(dd);
- ret = hfi1_cq_init(dd);
done:
/*
* Set status even if port serdes is not initialized
@@ -779,20 +786,15 @@ done:
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
- /* initialize the qsfp if it exists
- * Requires interrupts to be enabled so we are notified
- * when the QSFP completes reset, and has
- * to be done before bringing up the SERDES
+ /*
+ * start the serdes - must be after interrupts are
+ * enabled so we are notified when the link goes up
*/
- init_qsfp(ppd);
-
- /* start the serdes - must be after interrupts are
- enabled so we are notified when the link goes up */
lastfail = bringup_serdes(ppd);
if (lastfail)
dd_dev_info(dd,
- "Failed to bring up port %u\n",
- ppd->port);
+ "Failed to bring up port %u\n",
+ ppd->port);
/*
* Set status even if port serdes is not initialized
@@ -904,6 +906,8 @@ static void shutdown_device(struct hfi1_devdata *dd)
/* disable the send device */
pio_send_control(dd, PSC_GLOBAL_DISABLE);
+ shutdown_led_override(ppd);
+
/*
* Clear SerdesEnable.
* We can't count on interrupts since we are stopping.
@@ -961,17 +965,33 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.buffers);
sc_free(rcd->sc);
- vfree(rcd->physshadow);
- vfree(rcd->tid_pg_list);
vfree(rcd->user_event_mask);
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
- kfree(rcd->tidusemap);
kfree(rcd->opstats);
kfree(rcd);
}
+/*
+ * Release our hold on the shared asic data. If we are the last one,
+ * free the structure. Must be holding hfi1_devs_lock.
+ */
+static void release_asic_data(struct hfi1_devdata *dd)
+{
+ int other;
+
+ if (!dd->asic_data)
+ return;
+ dd->asic_data->dds[dd->hfi1_id] = NULL;
+ other = dd->hfi1_id ? 0 : 1;
+ if (!dd->asic_data->dds[other]) {
+ /* we are the last holder, free it */
+ kfree(dd->asic_data);
+ }
+ dd->asic_data = NULL;
+}
+
void hfi1_free_devdata(struct hfi1_devdata *dd)
{
unsigned long flags;
@@ -979,12 +999,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
spin_lock_irqsave(&hfi1_devs_lock, flags);
idr_remove(&hfi1_unit_table, dd->unit);
list_del(&dd->list);
+ release_asic_data(dd);
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+ free_platform_config(dd);
rcu_barrier(); /* wait for rcu callbacks to complete */
free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit);
- ib_dealloc_device(&dd->verbs_dev.ibdev);
+ hfi1_dev_affinity_free(dd);
+ free_percpu(dd->send_schedule);
+ ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
}
/*
@@ -999,19 +1022,19 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
{
unsigned long flags;
struct hfi1_devdata *dd;
- int ret;
+ int ret, nports;
- dd = (struct hfi1_devdata *)ib_alloc_device(sizeof(*dd) + extra);
+ /* extra is * number of ports */
+ nports = extra / sizeof(struct hfi1_pportdata);
+
+ dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+ nports);
if (!dd)
return ERR_PTR(-ENOMEM);
- /* extra is * number of ports */
- dd->num_pports = extra / sizeof(struct hfi1_pportdata);
+ dd->num_pports = nports;
dd->pport = (struct hfi1_pportdata *)(dd + 1);
INIT_LIST_HEAD(&dd->list);
- dd->node = dev_to_node(&pdev->dev);
- if (dd->node < 0)
- dd->node = 0;
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&hfi1_devs_lock, flags);
@@ -1041,9 +1064,9 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->sc_init_lock);
spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
- mutex_init(&dd->qsfp_i2c_mutex);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
+ spin_lock_init(&dd->pio_map_lock);
init_waitqueue_head(&dd->event_queue);
dd->int_counter = alloc_percpu(u64);
@@ -1062,6 +1085,14 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
goto bail;
}
+ dd->send_schedule = alloc_percpu(u64);
+ if (!dd->send_schedule) {
+ ret = -ENOMEM;
+ hfi1_early_err(&pdev->dev,
+ "Could not allocate per-cpu int_counter\n");
+ goto bail;
+ }
+
if (!hfi1_cpulist_count) {
u32 count = num_online_cpus();
@@ -1074,13 +1105,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
&pdev->dev,
"Could not alloc cpulist info, cpu affinity might be wrong\n");
}
- hfi1_dbg_ibdev_init(&dd->verbs_dev);
return dd;
bail:
if (!list_empty(&dd->list))
list_del_init(&dd->list);
- ib_dealloc_device(&dd->verbs_dev.ibdev);
+ ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
return ERR_PTR(ret);
}
@@ -1173,8 +1203,10 @@ static int __init hfi1_mod_init(void)
user_credit_return_threshold = 100;
compute_krcvqs();
- /* sanitize receive interrupt count, time must wait until after
- the hardware type is known */
+ /*
+ * sanitize receive interrupt count, time must wait until after
+ * the hardware type is known
+ */
if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
/* reject invalid combinations */
@@ -1209,6 +1241,9 @@ static int __init hfi1_mod_init(void)
idr_init(&hfi1_unit_table);
hfi1_dbg_init();
+ ret = hfi1_wss_init();
+ if (ret < 0)
+ goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@@ -1217,6 +1252,8 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */
bail_dev:
+ hfi1_wss_exit();
+bail_wss:
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
dev_cleanup();
@@ -1232,6 +1269,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
+ hfi1_wss_exit();
hfi1_dbg_exit();
hfi1_cpulist_count = 0;
kfree(hfi1_cpulist);
@@ -1303,16 +1341,18 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
}
}
kfree(tmp);
+ free_pio_map(dd);
/* must follow rcv context free - need to remove rcv's hooks */
for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
sc_free(dd->send_contexts[ctxt].sc);
dd->num_send_contexts = 0;
kfree(dd->send_contexts);
dd->send_contexts = NULL;
+ kfree(dd->hw_to_sw);
+ dd->hw_to_sw = NULL;
kfree(dd->boardname);
vfree(dd->events);
vfree(dd->status);
- hfi1_cq_exit(dd);
}
/*
@@ -1346,6 +1386,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = -EINVAL;
goto bail;
}
+ if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+ hfi1_early_err(&pdev->dev,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
+ ret = -EINVAL;
+ goto bail;
+ }
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1422,8 +1469,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* we still create devices, so diags, etc. can be used
* to determine cause of problem.
*/
- if (!initfail && !ret)
+ if (!initfail && !ret) {
dd->flags |= HFI1_INITTED;
+ /* create debufs files after init and ib register */
+ hfi1_dbg_ibdev_init(&dd->verbs_dev);
+ }
j = hfi1_device_create(dd);
if (j)
@@ -1464,6 +1514,8 @@ static void remove_one(struct pci_dev *pdev)
{
struct hfi1_devdata *dd = pci_get_drvdata(pdev);
+ /* close debugfs files before ib unregister */
+ hfi1_dbg_ibdev_exit(&dd->verbs_dev);
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
@@ -1516,18 +1568,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
- "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
- amt, rcd->ctxt);
+ "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+ amt, rcd->ctxt);
goto bail;
}
- /* Event mask is per device now and is in hfi1_devdata */
- /*if (rcd->ctxt >= dd->first_user_ctxt) {
- rcd->user_event_mask = vmalloc_user(PAGE_SIZE);
- if (!rcd->user_event_mask)
- goto bail_free_hdrq;
- }*/
-
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
@@ -1568,8 +1613,8 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
bail_free:
dd_dev_err(dd,
- "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
- rcd->ctxt);
+ "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+ rcd->ctxt);
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
@@ -1659,7 +1704,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
if (rcd->egrbufs.rcvtid_size == round_mtu ||
!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
- rcd->ctxt);
+ rcd->ctxt);
goto bail_rcvegrbuf_phys;
}
@@ -1694,8 +1739,9 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[j].len)) {
j++;
offset = 0;
- } else
+ } else {
offset += new_size;
+ }
}
rcd->egrbufs.rcvtid_size = new_size;
}
@@ -1708,7 +1754,6 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
rcd->egrbufs.size);
-
/*
* Set the contexts rcv array head update threshold to the closest
* power of 2 (so we can use a mask instead of modulo) below half
@@ -1742,14 +1787,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
- rcd->egrbufs.rcvtids[idx].phys, order);
+ rcd->egrbufs.rcvtids[idx].phys, order);
cond_resched();
}
goto bail;
bail_rcvegrbuf_phys:
for (idx = 0; idx < rcd->egrbufs.alloced &&
- rcd->egrbufs.buffers[idx].addr;
+ rcd->egrbufs.buffers[idx].addr;
idx++) {
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[idx].len,
diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c
index 426582b9ab65..65348d16ab2f 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/staging/rdma/hfi1/intr.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -98,7 +95,7 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
*/
if (!(dd->flags & HFI1_INITTED))
return;
- event.device = &dd->verbs_dev.ibdev;
+ event.device = &dd->verbs_dev.rdi.ibdev;
event.element.port_num = ppd->port;
event.event = ev;
ib_dispatch_event(&event);
@@ -131,28 +128,26 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
* NOTE: This uses this device's vAU, vCU, and vl15_init for
* the remote values. Both sides must be using the values.
*/
- if (quick_linkup
- || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+ if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
set_up_vl15(dd, dd->vau, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
ppd->neighbor_guid =
- read_csr(dd,
- DC_DC8051_STS_REMOTE_GUID);
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
ppd->neighbor_type =
read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
ppd->neighbor_port_number =
read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd,
- "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
+ ppd->neighbor_guid,
+ ppd->neighbor_type);
}
/* physical link went up */
ppd->linkup = 1;
- ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/* link widths are not available until the link is fully up */
get_linkup_link_widths(ppd);
@@ -165,7 +160,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
reset_link_credits(dd);
/* freeze after a link down to guarantee a clean egress */
- start_freeze_handling(ppd, FREEZE_SELF|FREEZE_LINK_DOWN);
+ start_freeze_handling(ppd, FREEZE_SELF | FREEZE_LINK_DOWN);
ev = IB_EVENT_PORT_ERR;
@@ -177,8 +172,6 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
/* notify IB of the link change */
signal_ib_event(ppd, ev);
}
-
-
}
/*
diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h
index e8ba5606d08d..2ec6ef38d389 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/staging/rdma/hfi1/iowait.h
@@ -1,14 +1,13 @@
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -54,6 +51,8 @@
#include <linux/workqueue.h>
#include <linux/sched.h>
+#include "sdma_txreq.h"
+
/*
* typedef (*restart_t)() - restart callback
* @work: pointer to work structure
@@ -67,9 +66,11 @@ struct sdma_engine;
* @list: used to add/insert into QP/PQ wait lists
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
- * @wakeup: space callback
+ * @wakeup: space callback wakeup
+ * @sdma_drained: sdma count drained
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
+ * @wait_pio: wait for pio_busy == 0
* @sdma_busy: # of packets in flight
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
@@ -101,9 +102,12 @@ struct iowait {
struct sdma_txreq *tx,
unsigned seq);
void (*wakeup)(struct iowait *wait, int reason);
+ void (*sdma_drained)(struct iowait *wait);
struct work_struct iowork;
wait_queue_head_t wait_dma;
+ wait_queue_head_t wait_pio;
atomic_t sdma_busy;
+ atomic_t pio_busy;
u32 count;
u32 tx_limit;
u32 tx_count;
@@ -117,7 +121,7 @@ struct iowait {
* @tx_limit: limit for overflow queuing
* @func: restart function for workqueue
* @sleep: sleep function for no space
- * @wakeup: wakeup function for no space
+ * @resume: wakeup function for no space
*
* This function initializes the iowait
* structure embedded in the QP or PQ.
@@ -133,17 +137,21 @@ static inline void iowait_init(
struct iowait *wait,
struct sdma_txreq *tx,
unsigned seq),
- void (*wakeup)(struct iowait *wait, int reason))
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
init_waitqueue_head(&wait->wait_dma);
+ init_waitqueue_head(&wait->wait_pio);
atomic_set(&wait->sdma_busy, 0);
+ atomic_set(&wait->pio_busy, 0);
wait->tx_limit = tx_limit;
wait->sleep = sleep;
wait->wakeup = wakeup;
+ wait->sdma_drained = sdma_drained;
}
/**
@@ -174,6 +182,88 @@ static inline void iowait_sdma_drain(struct iowait *wait)
}
/**
+ * iowait_sdma_pending() - return sdma pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_sdma_pending(struct iowait *wait)
+{
+ return atomic_read(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_inc - note sdma io pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_inc(struct iowait *wait)
+{
+ atomic_inc(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_add - add count to pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_add(struct iowait *wait, int count)
+{
+ atomic_add(count, &wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_dec - note sdma complete
+ * @wait: iowait structure
+ */
+static inline int iowait_sdma_dec(struct iowait *wait)
+{
+ return atomic_dec_and_test(&wait->sdma_busy);
+}
+
+/**
+ * iowait_pio_drain() - wait for pios to drain
+ *
+ * @wait: iowait structure
+ *
+ * This will delay until the iowait pios have
+ * completed.
+ */
+static inline void iowait_pio_drain(struct iowait *wait)
+{
+ wait_event_timeout(wait->wait_pio,
+ !atomic_read(&wait->pio_busy),
+ HZ);
+}
+
+/**
+ * iowait_pio_pending() - return pio pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_pio_pending(struct iowait *wait)
+{
+ return atomic_read(&wait->pio_busy);
+}
+
+/**
+ * iowait_pio_inc - note pio pending
+ * @wait: iowait structure
+ */
+static inline void iowait_pio_inc(struct iowait *wait)
+{
+ atomic_inc(&wait->pio_busy);
+}
+
+/**
+ * iowait_sdma_dec - note pio complete
+ * @wait: iowait structure
+ */
+static inline int iowait_pio_dec(struct iowait *wait)
+{
+ return atomic_dec_and_test(&wait->pio_busy);
+}
+
+/**
* iowait_drain_wakeup() - trigger iowait_drain() waiter
*
* @wait: iowait structure
@@ -183,6 +273,28 @@ static inline void iowait_sdma_drain(struct iowait *wait)
static inline void iowait_drain_wakeup(struct iowait *wait)
{
wake_up(&wait->wait_dma);
+ wake_up(&wait->wait_pio);
+ if (wait->sdma_drained)
+ wait->sdma_drained(wait);
+}
+
+/**
+ * iowait_get_txhead() - get packet off of iowait list
+ *
+ * @wait wait struture
+ */
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+{
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&wait->tx_head)) {
+ tx = list_first_entry(
+ &wait->tx_head,
+ struct sdma_txreq,
+ list);
+ list_del_init(&tx->list);
+ }
+ return tx;
}
#endif
diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
deleted file mode 100644
index e34f093a6b55..000000000000
--- a/drivers/staging/rdma/hfi1/keys.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "hfi.h"
-
-/**
- * hfi1_alloc_lkey - allocate an lkey
- * @mr: memory region that this lkey protects
- * @dma_region: 0->normal key, 1->restricted DMA key
- *
- * Returns 0 if successful, otherwise returns -errno.
- *
- * Increments mr reference count as required.
- *
- * Sets the lkey field mr for non-dma regions.
- *
- */
-
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region)
-{
- unsigned long flags;
- u32 r;
- u32 n;
- int ret = 0;
- struct hfi1_ibdev *dev = to_idev(mr->pd->device);
- struct hfi1_lkey_table *rkt = &dev->lk_table;
-
- hfi1_get_mr(mr);
- spin_lock_irqsave(&rkt->lock, flags);
-
- /* special case for dma_mr lkey == 0 */
- if (dma_region) {
- struct hfi1_mregion *tmr;
-
- tmr = rcu_access_pointer(dev->dma_mr);
- if (!tmr) {
- rcu_assign_pointer(dev->dma_mr, mr);
- mr->lkey_published = 1;
- } else {
- hfi1_put_mr(mr);
- }
- goto success;
- }
-
- /* Find the next available LKEY */
- r = rkt->next;
- n = r;
- for (;;) {
- if (!rcu_access_pointer(rkt->table[r]))
- break;
- r = (r + 1) & (rkt->max - 1);
- if (r == n)
- goto bail;
- }
- rkt->next = (r + 1) & (rkt->max - 1);
- /*
- * Make sure lkey is never zero which is reserved to indicate an
- * unrestricted LKEY.
- */
- rkt->gen++;
- /*
- * bits are capped in verbs.c to ensure enough bits for
- * generation number
- */
- mr->lkey = (r << (32 - hfi1_lkey_table_size)) |
- ((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen)
- << 8);
- if (mr->lkey == 0) {
- mr->lkey = 1 << 8;
- rkt->gen++;
- }
- rcu_assign_pointer(rkt->table[r], mr);
- mr->lkey_published = 1;
-success:
- spin_unlock_irqrestore(&rkt->lock, flags);
-out:
- return ret;
-bail:
- hfi1_put_mr(mr);
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = -ENOMEM;
- goto out;
-}
-
-/**
- * hfi1_free_lkey - free an lkey
- * @mr: mr to free from tables
- */
-void hfi1_free_lkey(struct hfi1_mregion *mr)
-{
- unsigned long flags;
- u32 lkey = mr->lkey;
- u32 r;
- struct hfi1_ibdev *dev = to_idev(mr->pd->device);
- struct hfi1_lkey_table *rkt = &dev->lk_table;
- int freed = 0;
-
- spin_lock_irqsave(&rkt->lock, flags);
- if (!mr->lkey_published)
- goto out;
- if (lkey == 0)
- RCU_INIT_POINTER(dev->dma_mr, NULL);
- else {
- r = lkey >> (32 - hfi1_lkey_table_size);
- RCU_INIT_POINTER(rkt->table[r], NULL);
- }
- mr->lkey_published = 0;
- freed++;
-out:
- spin_unlock_irqrestore(&rkt->lock, flags);
- if (freed) {
- synchronize_rcu();
- hfi1_put_mr(mr);
- }
-}
-
-/**
- * hfi1_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
- struct hfi1_sge *isge, struct ib_sge *sge, int acc)
-{
- struct hfi1_mregion *mr;
- unsigned n, m;
- size_t off;
-
- /*
- * We use LKEY == zero for kernel virtual addresses
- * (see hfi1_get_dma_mr and dma.c).
- */
- rcu_read_lock();
- if (sge->lkey == 0) {
- struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
- if (pd->user)
- goto bail;
- mr = rcu_dereference(dev->dma_mr);
- if (!mr)
- goto bail;
- atomic_inc(&mr->refcount);
- rcu_read_unlock();
-
- isge->mr = mr;
- isge->vaddr = (void *) sge->addr;
- isge->length = sge->length;
- isge->sge_length = sge->length;
- isge->m = 0;
- isge->n = 0;
- goto ok;
- }
- mr = rcu_dereference(
- rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]);
- if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
- goto bail;
-
- off = sge->addr - mr->user_base;
- if (unlikely(sge->addr < mr->user_base ||
- off + sge->length > mr->length ||
- (mr->access_flags & acc) != acc))
- goto bail;
- atomic_inc(&mr->refcount);
- rcu_read_unlock();
-
- off += mr->offset;
- if (mr->page_shift) {
- /*
- page sizes are uniform power of 2 so no loop is necessary
- entries_spanned_by_off is the number of times the loop below
- would have executed.
- */
- size_t entries_spanned_by_off;
-
- entries_spanned_by_off = off >> mr->page_shift;
- off -= (entries_spanned_by_off << mr->page_shift);
- m = entries_spanned_by_off / HFI1_SEGSZ;
- n = entries_spanned_by_off % HFI1_SEGSZ;
- } else {
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
- }
- isge->mr = mr;
- isge->vaddr = mr->map[m]->segs[n].vaddr + off;
- isge->length = mr->map[m]->segs[n].length - off;
- isge->sge_length = sge->length;
- isge->m = m;
- isge->n = n;
-ok:
- return 1;
-bail:
- rcu_read_unlock();
- return 0;
-}
-
-/**
- * hfi1_rkey_ok - check the IB virtual address, length, and RKEY
- * @qp: qp for validation
- * @sge: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- *
- * increments the reference count upon success
- */
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
- u32 len, u64 vaddr, u32 rkey, int acc)
-{
- struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
- struct hfi1_mregion *mr;
- unsigned n, m;
- size_t off;
-
- /*
- * We use RKEY == zero for kernel virtual addresses
- * (see hfi1_get_dma_mr and dma.c).
- */
- rcu_read_lock();
- if (rkey == 0) {
- struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
- struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
- if (pd->user)
- goto bail;
- mr = rcu_dereference(dev->dma_mr);
- if (!mr)
- goto bail;
- atomic_inc(&mr->refcount);
- rcu_read_unlock();
-
- sge->mr = mr;
- sge->vaddr = (void *) vaddr;
- sge->length = len;
- sge->sge_length = len;
- sge->m = 0;
- sge->n = 0;
- goto ok;
- }
-
- mr = rcu_dereference(
- rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]);
- if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
- goto bail;
-
- off = vaddr - mr->iova;
- if (unlikely(vaddr < mr->iova || off + len > mr->length ||
- (mr->access_flags & acc) == 0))
- goto bail;
- atomic_inc(&mr->refcount);
- rcu_read_unlock();
-
- off += mr->offset;
- if (mr->page_shift) {
- /*
- page sizes are uniform power of 2 so no loop is necessary
- entries_spanned_by_off is the number of times the loop below
- would have executed.
- */
- size_t entries_spanned_by_off;
-
- entries_spanned_by_off = off >> mr->page_shift;
- off -= (entries_spanned_by_off << mr->page_shift);
- m = entries_spanned_by_off / HFI1_SEGSZ;
- n = entries_spanned_by_off % HFI1_SEGSZ;
- } else {
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
- }
- sge->mr = mr;
- sge->vaddr = mr->map[m]->segs[n].vaddr + off;
- sge->length = mr->map[m]->segs[n].length - off;
- sge->sge_length = len;
- sge->m = m;
- sge->n = n;
-ok:
- return 1;
-bail:
- rcu_read_unlock();
- return 0;
-}
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c
index 77700b818e3d..d1e7f4d7cf6f 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/staging/rdma/hfi1/mad.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -55,6 +52,7 @@
#include "hfi.h"
#include "mad.h"
#include "trace.h"
+#include "qp.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -91,7 +89,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
int pkey_idx;
u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
- agent = ibp->send_agent;
+ agent = ibp->rvp.send_agent;
if (!agent)
return;
@@ -100,7 +98,8 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
return;
/* o14-2 */
- if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+ if (ibp->rvp.trap_timeout && time_before(jiffies,
+ ibp->rvp.trap_timeout))
return;
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -121,42 +120,43 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
smp->class_version = OPA_SMI_CLASS_VERSION;
smp->method = IB_MGMT_METHOD_TRAP;
- ibp->tid++;
- smp->tid = cpu_to_be64(ibp->tid);
+ ibp->rvp.tid++;
+ smp->tid = cpu_to_be64(ibp->rvp.tid);
smp->attr_id = IB_SMP_ATTR_NOTICE;
/* o14-1: smp->mkey = 0; */
memcpy(smp->route.lid.data, data, len);
- spin_lock_irqsave(&ibp->lock, flags);
- if (!ibp->sm_ah) {
- if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ if (!ibp->rvp.sm_ah) {
+ if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
struct ib_ah *ah;
- ah = hfi1_create_qp0_ah(ibp, ibp->sm_lid);
- if (IS_ERR(ah))
+ ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
+ if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- else {
+ } else {
send_buf->ah = ah;
- ibp->sm_ah = to_iah(ah);
+ ibp->rvp.sm_ah = ibah_to_rvtah(ah);
ret = 0;
}
- } else
+ } else {
ret = -EINVAL;
+ }
} else {
- send_buf->ah = &ibp->sm_ah->ibah;
+ send_buf->ah = &ibp->rvp.sm_ah->ibah;
ret = 0;
}
- spin_unlock_irqrestore(&ibp->lock, flags);
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (!ret)
ret = ib_post_send_mad(send_buf, NULL);
if (!ret) {
/* 4.096 usec. */
- timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
- ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+ timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+ ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
} else {
ib_free_send_mad(send_buf);
- ibp->trap_timeout = 0;
+ ibp->rvp.trap_timeout = 0;
}
}
@@ -174,10 +174,10 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
memset(&data, 0, sizeof(data));
if (trap_num == OPA_TRAP_BAD_P_KEY)
- ibp->pkey_violations++;
+ ibp->rvp.pkey_violations++;
else
- ibp->qkey_violations++;
- ibp->n_pkt_drops++;
+ ibp->rvp.qkey_violations++;
+ ibp->rvp.n_pkt_drops++;
/* Send violation trap */
data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -233,9 +233,12 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/*
* Send a Port Capability Mask Changed trap (ch. 14.3.11).
*/
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
{
struct opa_mad_notice_attr data;
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+ struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
u32 lid = ppd_from_ibp(ibp)->lid;
memset(&data, 0, sizeof(data));
@@ -245,7 +248,7 @@ void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
data.issuer_lid = cpu_to_be32(lid);
data.ntc_144.lid = data.issuer_lid;
- data.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
+ data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
send_trap(ibp, &data, sizeof(data));
}
@@ -407,37 +410,38 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
int ret = 0;
/* Is the mkey in the process of expiring? */
- if (ibp->mkey_lease_timeout &&
- time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+ if (ibp->rvp.mkey_lease_timeout &&
+ time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
/* Clear timeout and mkey protection field. */
- ibp->mkey_lease_timeout = 0;
- ibp->mkeyprot = 0;
+ ibp->rvp.mkey_lease_timeout = 0;
+ ibp->rvp.mkeyprot = 0;
}
- if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 ||
- ibp->mkey == mkey)
+ if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->rvp.mkey == 0 ||
+ ibp->rvp.mkey == mkey)
valid_mkey = 1;
/* Unset lease timeout on any valid Get/Set/TrapRepress */
- if (valid_mkey && ibp->mkey_lease_timeout &&
+ if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
(mad->method == IB_MGMT_METHOD_GET ||
mad->method == IB_MGMT_METHOD_SET ||
mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
- ibp->mkey_lease_timeout = 0;
+ ibp->rvp.mkey_lease_timeout = 0;
if (!valid_mkey) {
switch (mad->method) {
case IB_MGMT_METHOD_GET:
/* Bad mkey not a violation below level 2 */
- if (ibp->mkeyprot < 2)
+ if (ibp->rvp.mkeyprot < 2)
break;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
- if (ibp->mkey_violations != 0xFFFF)
- ++ibp->mkey_violations;
- if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
- ibp->mkey_lease_timeout = jiffies +
- ibp->mkey_lease_period * HZ;
+ if (ibp->rvp.mkey_violations != 0xFFFF)
+ ++ibp->rvp.mkey_violations;
+ if (!ibp->rvp.mkey_lease_timeout &&
+ ibp->rvp.mkey_lease_period)
+ ibp->rvp.mkey_lease_timeout = jiffies +
+ ibp->rvp.mkey_lease_period * HZ;
/* Generate a trap notice. */
bad_mkey(ibp, mad, mkey, dr_slid, return_path,
hop_cnt);
@@ -501,16 +505,6 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
}
-static u8 __opa_porttype(struct hfi1_pportdata *ppd)
-{
- if (qsfp_mod_present(ppd)) {
- if (ppd->qsfp_info.cache_valid)
- return OPA_PORT_TYPE_STANDARD;
- return OPA_PORT_TYPE_DISCONNECTED;
- }
- return OPA_PORT_TYPE_UNKNOWN;
-}
-
static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -522,6 +516,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct opa_port_info *pi = (struct opa_port_info *)data;
u8 mtu;
u8 credit_rate;
+ u8 is_beaconing_active;
u32 state;
u32 num_ports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -538,8 +533,8 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd = dd->pport + (port - 1);
ibp = &ppd->ibport_data;
- if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
- ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+ if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+ ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -548,14 +543,14 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
/* Only return the mkey if the protection field allows it. */
if (!(smp->method == IB_MGMT_METHOD_GET &&
- ibp->mkey != smp->mkey &&
- ibp->mkeyprot == 1))
- pi->mkey = ibp->mkey;
-
- pi->subnet_prefix = ibp->gid_prefix;
- pi->sm_lid = cpu_to_be32(ibp->sm_lid);
- pi->ib_cap_mask = cpu_to_be32(ibp->port_cap_flags);
- pi->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+ ibp->rvp.mkey != smp->mkey &&
+ ibp->rvp.mkeyprot == 1))
+ pi->mkey = ibp->rvp.mkey;
+
+ pi->subnet_prefix = ibp->rvp.gid_prefix;
+ pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
+ pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+ pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
pi->sa_qp = cpu_to_be32(ppd->sa_qp);
@@ -581,38 +576,45 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
if (start_of_sm_config && (state == IB_PORT_INIT))
ppd->is_sm_config_started = 1;
- pi->port_phys_conf = __opa_porttype(ppd) & 0xf;
+ pi->port_phys_conf = (ppd->port_type & 0xf);
#if PI_LED_ENABLE_SUP
pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
pi->port_states.ledenable_offlinereason |=
ppd->is_sm_config_started << 5;
+ /*
+ * This pairs with the memory barrier in hfi1_start_led_override to
+ * ensure that we read the correct state of LED beaconing represented
+ * by led_override_timer_active
+ */
+ smp_rmb();
+ is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+ pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
pi->port_states.ledenable_offlinereason |=
- ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+ ppd->offline_disabled_reason;
#else
pi->port_states.offline_reason = ppd->neighbor_normal << 4;
pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
- pi->port_states.offline_reason |= ppd->offline_disabled_reason &
- OPA_PI_MASK_OFFLINE_REASON;
+ pi->port_states.offline_reason |= ppd->offline_disabled_reason;
#endif /* PI_LED_ENABLE_SUP */
pi->port_states.portphysstate_portstate =
(hfi1_ibphys_portstate(ppd) << 4) | state;
- pi->mkeyprotect_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+ pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
for (i = 0; i < ppd->vls_supported; i++) {
mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
if ((i % 2) == 0)
- pi->neigh_mtu.pvlx_to_mtu[i/2] |= (mtu << 4);
+ pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
else
- pi->neigh_mtu.pvlx_to_mtu[i/2] |= mtu;
+ pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
}
/* don't forget VL 15 */
mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
- pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu;
- pi->smsl = ibp->sm_sl & OPA_PI_MASK_SMSL;
+ pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
+ pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
pi->partenforce_filterraw |=
(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
@@ -620,17 +622,17 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
- pi->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+ pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
/* P_KeyViolations are counted by hardware. */
- pi->pkey_violations = cpu_to_be16(ibp->pkey_violations);
- pi->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+ pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+ pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
pi->vl.cap = ppd->vls_supported;
- pi->vl.high_limit = cpu_to_be16(ibp->vl_high_limit);
+ pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
- pi->clientrereg_subnettimeout = ibp->subnet_timeout;
+ pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
pi->port_link_mode = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
OPA_PORT_LINK_MODE_OPA << 5 |
@@ -701,8 +703,10 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
- /* this counter is 16 bits wide, but the replay_depth.wire
- * variable is only 8 bits */
+ /*
+ * this counter is 16 bits wide, but the replay_depth.wire
+ * variable is only 8 bits
+ */
if (tmp > 0xff)
tmp = 0xff;
pi->replay_depth.wire = tmp;
@@ -749,7 +753,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- n_blocks_avail = (u16) (npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+ n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
@@ -763,7 +767,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- p = (__be16 *) data;
+ p = (__be16 *)data;
q = (u16 *)data;
/* get the real pkeys if we are requesting the first block */
if (start_block == 0) {
@@ -772,9 +776,9 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
p[i] = cpu_to_be16(q[i]);
if (resp_len)
*resp_len += size;
- } else
+ } else {
smp->status |= IB_SMP_INVALID_FIELD;
-
+ }
return reply((struct ib_mad_hdr *)smp);
}
@@ -901,8 +905,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
u32 logical_old = driver_logical_state(ppd);
int ret, logical_allowed, physical_allowed;
- logical_allowed = ret =
- logical_transition_allowed(logical_old, logical_new);
+ ret = logical_transition_allowed(logical_old, logical_new);
+ logical_allowed = ret;
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
@@ -912,8 +916,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
return ret;
}
- physical_allowed = ret =
- physical_transition_allowed(physical_old, physical_new);
+ ret = physical_transition_allowed(physical_old, physical_new);
+ physical_allowed = ret;
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
@@ -928,6 +932,14 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
return HFI_TRANSITION_IGNORED;
/*
+ * A change request of Physical Port State from
+ * 'Offline' to 'Polling' should be ignored.
+ */
+ if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
+ (physical_new == IB_PORTPHYSSTATE_POLLING))
+ return HFI_TRANSITION_IGNORED;
+
+ /*
* Either physical_allowed or logical_allowed is
* HFI_TRANSITION_ALLOWED.
*/
@@ -972,16 +984,15 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
break;
/* FALLTHROUGH */
case IB_PORT_DOWN:
- if (phys_state == IB_PORTPHYSSTATE_NOP)
+ if (phys_state == IB_PORTPHYSSTATE_NOP) {
link_state = HLS_DN_DOWNDEF;
- else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
+ } else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
link_state = HLS_DN_POLL;
- set_link_down_reason(ppd,
- OPA_LINKDOWN_REASON_FM_BOUNCE, 0,
- OPA_LINKDOWN_REASON_FM_BOUNCE);
- } else if (phys_state == IB_PORTPHYSSTATE_DISABLED)
+ set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
+ 0, OPA_LINKDOWN_REASON_FM_BOUNCE);
+ } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
link_state = HLS_DN_DISABLE;
- else {
+ } else {
pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
phys_state);
smp->status |= IB_SMP_INVALID_FIELD;
@@ -991,11 +1002,11 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
set_link_state(ppd, link_state);
if (link_state == HLS_DN_DISABLE &&
(ppd->offline_disabled_reason >
- OPA_LINKDOWN_REASON_SMA_DISABLED ||
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
ppd->offline_disabled_reason ==
- OPA_LINKDOWN_REASON_NONE))
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
ppd->offline_disabled_reason =
- OPA_LINKDOWN_REASON_SMA_DISABLED;
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
/*
* Don't send a reply if the response would be sent
* through the disabled port.
@@ -1091,13 +1102,13 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ls_old = driver_lstate(ppd);
- ibp->mkey = pi->mkey;
- ibp->gid_prefix = pi->subnet_prefix;
- ibp->mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
+ ibp->rvp.mkey = pi->mkey;
+ ibp->rvp.gid_prefix = pi->subnet_prefix;
+ ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
/* Must be a valid unicast LID address. */
if ((lid == 0 && ls_old > IB_PORT_INIT) ||
- lid >= HFI1_MULTICAST_LID_BASE) {
+ lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
smp->status |= IB_SMP_INVALID_FIELD;
pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
lid);
@@ -1130,23 +1141,23 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
- smlid >= HFI1_MULTICAST_LID_BASE) {
+ smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
smp->status |= IB_SMP_INVALID_FIELD;
pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
- } else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+ } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
- spin_lock_irqsave(&ibp->lock, flags);
- if (ibp->sm_ah) {
- if (smlid != ibp->sm_lid)
- ibp->sm_ah->attr.dlid = smlid;
- if (msl != ibp->sm_sl)
- ibp->sm_ah->attr.sl = msl;
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ if (ibp->rvp.sm_ah) {
+ if (smlid != ibp->rvp.sm_lid)
+ ibp->rvp.sm_ah->attr.dlid = smlid;
+ if (msl != ibp->rvp.sm_sl)
+ ibp->rvp.sm_ah->attr.sl = msl;
}
- spin_unlock_irqrestore(&ibp->lock, flags);
- if (smlid != ibp->sm_lid)
- ibp->sm_lid = smlid;
- if (msl != ibp->sm_sl)
- ibp->sm_sl = msl;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ if (smlid != ibp->rvp.sm_lid)
+ ibp->rvp.sm_lid = smlid;
+ if (msl != ibp->rvp.sm_sl)
+ ibp->rvp.sm_sl = msl;
event.event = IB_EVENT_SM_CHANGE;
ib_dispatch_event(&event);
}
@@ -1167,8 +1178,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->port_error_action = be32_to_cpu(pi->port_error_action);
lwe = be16_to_cpu(pi->link_width.enabled);
if (lwe) {
- if (lwe == OPA_LINK_WIDTH_RESET
- || lwe == OPA_LINK_WIDTH_RESET_OLD)
+ if (lwe == OPA_LINK_WIDTH_RESET ||
+ lwe == OPA_LINK_WIDTH_RESET_OLD)
set_link_width_enabled(ppd, ppd->link_width_supported);
else if ((lwe & ~ppd->link_width_supported) == 0)
set_link_width_enabled(ppd, lwe);
@@ -1177,19 +1188,21 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
}
lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
/* LWD.E is always applied - 0 means "disabled" */
- if (lwe == OPA_LINK_WIDTH_RESET
- || lwe == OPA_LINK_WIDTH_RESET_OLD) {
+ if (lwe == OPA_LINK_WIDTH_RESET ||
+ lwe == OPA_LINK_WIDTH_RESET_OLD) {
set_link_width_downgrade_enabled(ppd,
- ppd->link_width_downgrade_supported);
+ ppd->
+ link_width_downgrade_supported
+ );
} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
/* only set and apply if something changed */
if (lwe != ppd->link_width_downgrade_enabled) {
set_link_width_downgrade_enabled(ppd, lwe);
call_link_downgrade_policy = 1;
}
- } else
+ } else {
smp->status |= IB_SMP_INVALID_FIELD;
-
+ }
lse = be16_to_cpu(pi->link_speed.enabled);
if (lse) {
if (lse & be16_to_cpu(pi->link_speed.supported))
@@ -1198,22 +1211,24 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
smp->status |= IB_SMP_INVALID_FIELD;
}
- ibp->mkeyprot = (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
- ibp->vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
+ ibp->rvp.mkeyprot =
+ (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
+ ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
- ibp->vl_high_limit);
+ ibp->rvp.vl_high_limit);
- if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
- ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+ if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+ ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
for (i = 0; i < ppd->vls_supported; i++) {
if ((i % 2) == 0)
- mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i/2] >> 4)
- & 0xF);
+ mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
+ 4) & 0xF);
else
- mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i/2] & 0xF);
+ mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
+ 0xF);
if (mtu == 0xffff) {
pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
mtu,
@@ -1223,8 +1238,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
}
if (dd->vld[i].mtu != mtu) {
dd_dev_info(dd,
- "MTU change on vl %d from %d to %d\n",
- i, dd->vld[i].mtu, mtu);
+ "MTU change on vl %d from %d to %d\n",
+ i, dd->vld[i].mtu, mtu);
dd->vld[i].mtu = mtu;
call_set_mtu++;
}
@@ -1232,13 +1247,13 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
/* As per OPAV1 spec: VL15 must support and be configured
* for operation with a 2048 or larger MTU.
*/
- mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15/2] & 0xF);
+ mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
if (mtu < 2048 || mtu == 0xffff)
mtu = 2048;
if (dd->vld[15].mtu != mtu) {
dd_dev_info(dd,
- "MTU change on vl 15 from %d to %d\n",
- dd->vld[15].mtu, mtu);
+ "MTU change on vl 15 from %d to %d\n",
+ dd->vld[15].mtu, mtu);
dd->vld[15].mtu = mtu;
call_set_mtu++;
}
@@ -1254,21 +1269,21 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
smp->status |= IB_SMP_INVALID_FIELD;
} else {
if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
- vls) == -EINVAL)
+ vls) == -EINVAL)
smp->status |= IB_SMP_INVALID_FIELD;
}
}
if (pi->mkey_violations == 0)
- ibp->mkey_violations = 0;
+ ibp->rvp.mkey_violations = 0;
if (pi->pkey_violations == 0)
- ibp->pkey_violations = 0;
+ ibp->rvp.pkey_violations = 0;
if (pi->qkey_violations == 0)
- ibp->qkey_violations = 0;
+ ibp->rvp.qkey_violations = 0;
- ibp->subnet_timeout =
+ ibp->rvp.subnet_timeout =
pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
@@ -1388,7 +1403,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
event.event = IB_EVENT_PKEY_CHANGE;
- event.device = &dd->verbs_dev.ibdev;
+ event.device = &dd->verbs_dev.rdi.ibdev;
event.element.port_num = port;
ib_dispatch_event(&event);
}
@@ -1402,7 +1417,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_sent = OPA_AM_NBLK(am);
u32 start_block = am & 0x7ff;
- u16 *p = (u16 *) data;
+ u16 *p = (u16 *)data;
__be16 *q = (__be16 *)data;
int i;
u16 n_blocks_avail;
@@ -1415,7 +1430,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- n_blocks_avail = (u16)(npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+ n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
if (start_block + n_blocks_sent > n_blocks_avail ||
n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
@@ -1514,14 +1529,22 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
int i;
+ u8 sc;
if (am) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
- for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
- ibp->sl_to_sc[i] = *p++;
+ for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++) {
+ sc = *p++;
+ if (ibp->sl_to_sc[i] != sc) {
+ ibp->sl_to_sc[i] = sc;
+
+ /* Put all stale qps into error state */
+ hfi1_error_port_qps(ibp, i);
+ }
+ }
return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
}
@@ -1574,7 +1597,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
{
u32 n_blocks = OPA_AM_NBLK(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- void *vp = (void *) data;
+ void *vp = (void *)data;
size_t size = 4 * sizeof(u64);
if (n_blocks != 1) {
@@ -1597,7 +1620,7 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
u32 n_blocks = OPA_AM_NBLK(am);
int async_update = OPA_AM_ASYNC(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- void *vp = (void *) data;
+ void *vp = (void *)data;
struct hfi1_pportdata *ppd;
int lstate;
@@ -1609,8 +1632,10 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
/* IB numbers ports from 1, hw from 0 */
ppd = dd->pport + (port - 1);
lstate = driver_lstate(ppd);
- /* it's known that async_update is 0 by this point, but include
- * the explicit check for clarity */
+ /*
+ * it's known that async_update is 0 by this point, but include
+ * the explicit check for clarity
+ */
if (!async_update &&
(lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
smp->status |= IB_SMP_INVALID_FIELD;
@@ -1629,7 +1654,7 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
- void *vp = (void *) data;
+ void *vp = (void *)data;
int size;
if (n_blocks != 1) {
@@ -1654,7 +1679,7 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
- void *vp = (void *) data;
+ void *vp = (void *)data;
int lstate;
if (n_blocks != 1) {
@@ -1687,7 +1712,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
u32 lstate;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
- struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+ struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
if (nports != 1) {
smp->status |= IB_SMP_INVALID_FIELD;
@@ -1707,12 +1732,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
psi->port_states.ledenable_offlinereason |=
ppd->is_sm_config_started << 5;
psi->port_states.ledenable_offlinereason |=
- ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+ ppd->offline_disabled_reason;
#else
psi->port_states.offline_reason = ppd->neighbor_normal << 4;
psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
- psi->port_states.offline_reason |= ppd->offline_disabled_reason &
- OPA_PI_MASK_OFFLINE_REASON;
+ psi->port_states.offline_reason |= ppd->offline_disabled_reason;
#endif /* PI_LED_ENABLE_SUP */
psi->port_states.portphysstate_portstate =
@@ -1737,7 +1761,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
u8 ls_new, ps_new;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
- struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+ struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
int ret, invalid = 0;
if (nports != 1) {
@@ -1782,14 +1806,16 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
u32 len = OPA_AM_CI_LEN(am) + 1;
int ret;
-#define __CI_PAGE_SIZE (1 << 7) /* 128 bytes */
+#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
#define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
#define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
- /* check that addr is within spec, and
- * addr and (addr + len - 1) are on the same "page" */
+ /*
+ * check that addr is within spec, and
+ * addr and (addr + len - 1) are on the same "page"
+ */
if (addr >= 4096 ||
- (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
+ (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1823,7 +1849,7 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
- struct buffer_control *p = (struct buffer_control *) data;
+ struct buffer_control *p = (struct buffer_control *)data;
int size;
if (num_ports != 1) {
@@ -1846,7 +1872,7 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
- struct buffer_control *p = (struct buffer_control *) data;
+ struct buffer_control *p = (struct buffer_control *)data;
if (num_ports != 1) {
smp->status |= IB_SMP_INVALID_FIELD;
@@ -1919,13 +1945,15 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
switch (section) {
case OPA_VLARB_LOW_ELEMENTS:
- (void) fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
+ (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
break;
case OPA_VLARB_HIGH_ELEMENTS:
- (void) fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+ (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
break;
- /* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
- * can be changed from the default values */
+ /*
+ * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
+ * can be changed from the default values
+ */
case OPA_VLARB_PREEMPT_ELEMENTS:
/* FALLTHROUGH */
case OPA_VLARB_PREEMPT_MATRIX:
@@ -2137,8 +2165,10 @@ struct opa_port_data_counters_msg {
};
struct opa_port_error_counters64_msg {
- /* Request contains first two fields, response contains the
- * whole magilla */
+ /*
+ * Request contains first two fields, response contains the
+ * whole magilla
+ */
__be64 port_select_mask[4];
__be32 vl_select_mask;
@@ -2172,7 +2202,6 @@ struct opa_port_error_info_msg {
__be32 error_info_select_mask;
__be32 reserved1;
struct _port_ei {
-
u8 port_number;
u8 reserved2[7];
@@ -2251,7 +2280,7 @@ enum error_info_selects {
};
static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u32 *resp_len)
+ struct ib_device *ibdev, u32 *resp_len)
{
struct opa_class_port_info *p =
(struct opa_class_port_info *)pmp->data;
@@ -2299,9 +2328,9 @@ static void a0_portstatus(struct hfi1_pportdata *ppd,
}
}
-
static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
struct opa_port_status_req *req =
(struct opa_port_status_req *)pmp->data;
@@ -2326,8 +2355,8 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- if (nports != 1 || (port_num && port_num != port)
- || num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
+ if (nports != 1 || (port_num && port_num != port) ||
+ num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2357,7 +2386,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
CNTR_INVALID_VL));
rsp->port_multicast_xmit_pkts =
cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
- CNTR_INVALID_VL));
+ CNTR_INVALID_VL));
rsp->port_multicast_rcv_pkts =
cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
CNTR_INVALID_VL));
@@ -2386,7 +2415,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
}
tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
- CNTR_INVALID_VL);
+ CNTR_INVALID_VL);
if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
/* overflow/wrapped */
rsp->link_error_recovery = cpu_to_be32(~0);
@@ -2401,13 +2430,13 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
CNTR_INVALID_VL));
rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
- CNTR_INVALID_VL));
+ CNTR_INVALID_VL));
/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
- vlinfo = &(rsp->vls[0]);
+ vlinfo = &rsp->vls[0];
vfi = 0;
/* The vl_select_mask has been checked above, and we know
* that it contains only entries which represent valid VLs.
@@ -2423,27 +2452,27 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
rsp->vls[vfi].port_vl_rcv_pkts =
cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_xmit_data =
cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_xmit_pkts =
cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_xmit_wait =
cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_becn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
vlinfo++;
vfi++;
@@ -2473,7 +2502,7 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
CNTR_INVALID_VL);
error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
- CNTR_INVALID_VL);
+ CNTR_INVALID_VL);
/* local link integrity must be right-shifted by the lli resolution */
tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
@@ -2483,10 +2512,10 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
error_counter_summary += (tmp >> res_ler);
error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
- CNTR_INVALID_VL);
+ CNTR_INVALID_VL);
error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
- CNTR_INVALID_VL);
+ CNTR_INVALID_VL);
/* ppd->link_downed is a 32-bit value */
error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
CNTR_INVALID_VL);
@@ -2512,7 +2541,7 @@ static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
idx_from_vl(vl));
if (tmp < sum_vl_xmit_wait) {
/* we wrapped */
- sum_vl_xmit_wait = (u64) ~0;
+ sum_vl_xmit_wait = (u64)~0;
break;
}
sum_vl_xmit_wait = tmp;
@@ -2522,8 +2551,30 @@ static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
}
}
+static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
+ struct _port_dctrs *rsp)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
+ CNTR_INVALID_VL));
+ rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
+ CNTR_INVALID_VL));
+ rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
+ CNTR_INVALID_VL));
+ rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
+ CNTR_INVALID_VL));
+ rsp->port_multicast_xmit_pkts =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
+ CNTR_INVALID_VL));
+ rsp->port_multicast_rcv_pkts =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
+ CNTR_INVALID_VL));
+}
+
static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
struct opa_port_data_counters_msg *req =
(struct opa_port_data_counters_msg *)pmp->data;
@@ -2590,39 +2641,19 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
*/
hfi1_read_link_quality(dd, &lq);
rsp->link_quality_indicator = cpu_to_be32((u32)lq);
+ pma_get_opa_port_dctrs(ibdev, rsp);
- /* rsp->sw_port_congestion is 0 for HFIs */
- /* rsp->port_xmit_time_cong is 0 for HFIs */
- /* rsp->port_xmit_wasted_bw ??? */
- /* rsp->port_xmit_wait_data ??? */
- /* rsp->port_mark_fecn is 0 for HFIs */
-
- rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
- CNTR_INVALID_VL));
- rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
- CNTR_INVALID_VL));
- rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
- CNTR_INVALID_VL));
- rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
- CNTR_INVALID_VL));
- rsp->port_multicast_xmit_pkts =
- cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
- CNTR_INVALID_VL));
- rsp->port_multicast_rcv_pkts =
- cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
- CNTR_INVALID_VL));
rsp->port_xmit_wait =
cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
rsp->port_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
rsp->port_rcv_becn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
-
rsp->port_error_counter_summary =
cpu_to_be64(get_error_counter_summary(ibdev, port,
res_lli, res_ler));
- vlinfo = &(rsp->vls[0]);
+ vlinfo = &rsp->vls[0];
vfi = 0;
/* The vl_select_mask has been checked above, and we know
* that it contains only entries which represent valid VLs.
@@ -2630,44 +2661,45 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
* any additional checks for vl.
*/
for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ 8 * sizeof(req->vl_select_mask)) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_data =
cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_data =
cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_xmit_pkts =
cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_pkts =
cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_xmit_wait =
cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_becn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
- idx_from_vl(vl)));
+ idx_from_vl(vl)));
/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
/* rsp->port_vl_xmit_wasted_bw ??? */
/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
- * does this differ from rsp->vls[vfi].port_vl_xmit_wait */
+ * does this differ from rsp->vls[vfi].port_vl_xmit_wait
+ */
/*rsp->vls[vfi].port_vl_mark_fecn =
- cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
- + offset));
- */
+ * cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
+ * + offset));
+ */
vlinfo++;
vfi++;
}
@@ -2680,12 +2712,88 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
+static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
+ pmp->data;
+ struct _port_dctrs rsp;
+
+ if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+ goto bail;
+ }
+
+ memset(&rsp, 0, sizeof(rsp));
+ pma_get_opa_port_dctrs(ibdev, &rsp);
+
+ p->port_xmit_data = rsp.port_xmit_data;
+ p->port_rcv_data = rsp.port_rcv_data;
+ p->port_xmit_packets = rsp.port_xmit_pkts;
+ p->port_rcv_packets = rsp.port_rcv_pkts;
+ p->port_unicast_xmit_packets = 0;
+ p->port_unicast_rcv_packets = 0;
+ p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
+ p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
+
+bail:
+ return reply((struct ib_mad_hdr *)pmp);
+}
+
+static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
+ struct _port_ectrs *rsp, u8 port)
+{
+ u64 tmp, tmp2;
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_ibport *ibp = to_iport(ibdev, port);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
+ tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
+ CNTR_INVALID_VL);
+ if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
+ /* overflow/wrapped */
+ rsp->link_error_recovery = cpu_to_be32(~0);
+ } else {
+ rsp->link_error_recovery = cpu_to_be32(tmp2);
+ }
+
+ rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
+ CNTR_INVALID_VL));
+ rsp->port_rcv_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
+ rsp->port_rcv_remote_physical_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
+ CNTR_INVALID_VL));
+ rsp->port_rcv_switch_relay_errors = 0;
+ rsp->port_xmit_discards =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
+ CNTR_INVALID_VL));
+ rsp->port_xmit_constraint_errors =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
+ CNTR_INVALID_VL));
+ rsp->port_rcv_constraint_errors =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
+ CNTR_INVALID_VL));
+ tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
+ tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
+ if (tmp2 < tmp) {
+ /* overflow/wrapped */
+ rsp->local_link_integrity_errors = cpu_to_be64(~0);
+ } else {
+ rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
+ }
+ rsp->excessive_buffer_overruns =
+ cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+}
+
static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
size_t response_data_size;
struct _port_ectrs *rsp;
- unsigned long port_num;
+ u8 port_num;
struct opa_port_error_counters64_msg *req;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 num_ports;
@@ -2695,7 +2803,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
struct hfi1_pportdata *ppd;
struct _vls_ectrs *vlinfo;
unsigned long vl;
- u64 port_mask, tmp, tmp2;
+ u64 port_mask, tmp;
u32 vl_select_mask;
int vfi;
@@ -2724,9 +2832,9 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask));
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2737,46 +2845,18 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
ppd = ppd_from_ibp(ibp);
memset(rsp, 0, sizeof(*rsp));
- rsp->port_number = (u8)port_num;
+ rsp->port_number = port_num;
+
+ pma_get_opa_port_ectrs(ibdev, rsp, port_num);
- rsp->port_rcv_constraint_errors =
- cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
- CNTR_INVALID_VL));
- /* port_rcv_switch_relay_errors is 0 for HFIs */
- rsp->port_xmit_discards =
- cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
- CNTR_INVALID_VL));
rsp->port_rcv_remote_physical_errors =
cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
- CNTR_INVALID_VL));
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- if (tmp2 < tmp) {
- /* overflow/wrapped */
- rsp->local_link_integrity_errors = cpu_to_be64(~0);
- } else {
- rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
- }
- tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
- tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
- CNTR_INVALID_VL);
- if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
- /* overflow/wrapped */
- rsp->link_error_recovery = cpu_to_be32(~0);
- } else {
- rsp->link_error_recovery = cpu_to_be32(tmp2);
- }
- rsp->port_xmit_constraint_errors =
- cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
- CNTR_INVALID_VL));
- rsp->excessive_buffer_overruns =
- cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+ CNTR_INVALID_VL));
rsp->fm_config_errors =
cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
- CNTR_INVALID_VL));
- rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
- CNTR_INVALID_VL));
+ CNTR_INVALID_VL));
tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
+
rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
vlinfo = &rsp->vls[0];
@@ -2796,8 +2876,94 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
+static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct _port_ectrs rsp;
+ u64 temp_link_overrun_errors;
+ u64 temp_64;
+ u32 temp_32;
+
+ memset(&rsp, 0, sizeof(rsp));
+ pma_get_opa_port_ectrs(ibdev, &rsp, port);
+
+ if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+ goto bail;
+ }
+
+ p->symbol_error_counter = 0; /* N/A for OPA */
+
+ temp_32 = be32_to_cpu(rsp.link_error_recovery);
+ if (temp_32 > 0xFFUL)
+ p->link_error_recovery_counter = 0xFF;
+ else
+ p->link_error_recovery_counter = (u8)temp_32;
+
+ temp_32 = be32_to_cpu(rsp.link_downed);
+ if (temp_32 > 0xFFUL)
+ p->link_downed_counter = 0xFF;
+ else
+ p->link_downed_counter = (u8)temp_32;
+
+ temp_64 = be64_to_cpu(rsp.port_rcv_errors);
+ if (temp_64 > 0xFFFFUL)
+ p->port_rcv_errors = cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_errors = cpu_to_be16((u16)temp_64);
+
+ temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
+ if (temp_64 > 0xFFFFUL)
+ p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
+
+ temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
+ p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
+
+ temp_64 = be64_to_cpu(rsp.port_xmit_discards);
+ if (temp_64 > 0xFFFFUL)
+ p->port_xmit_discards = cpu_to_be16(0xFFFF);
+ else
+ p->port_xmit_discards = cpu_to_be16((u16)temp_64);
+
+ temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
+ if (temp_64 > 0xFFUL)
+ p->port_xmit_constraint_errors = 0xFF;
+ else
+ p->port_xmit_constraint_errors = (u8)temp_64;
+
+ temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
+ if (temp_64 > 0xFFUL)
+ p->port_rcv_constraint_errors = 0xFFUL;
+ else
+ p->port_rcv_constraint_errors = (u8)temp_64;
+
+ /* LocalLink: 7:4, BufferOverrun: 3:0 */
+ temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
+ if (temp_64 > 0xFUL)
+ temp_64 = 0xFUL;
+
+ temp_link_overrun_errors = temp_64 << 4;
+
+ temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
+ if (temp_64 > 0xFUL)
+ temp_64 = 0xFUL;
+ temp_link_overrun_errors |= temp_64;
+
+ p->link_overrun_errors = (u8)temp_link_overrun_errors;
+
+ p->vl15_dropped = 0; /* N/A for OPA */
+
+bail:
+ return reply((struct ib_mad_hdr *)pmp);
+}
+
static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
size_t response_data_size;
struct _port_ei *rsp;
@@ -2805,7 +2971,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u64 port_mask;
u32 num_ports;
- unsigned long port_num;
+ u8 port_num;
u8 num_pslm;
u64 reg;
@@ -2838,7 +3004,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
port_num = find_first_bit((unsigned long *)&port_mask,
sizeof(port_mask));
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2847,15 +3013,17 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
rsp->port_rcv_ei.status_and_code =
dd->err_info_rcvport.status_and_code;
memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
- &dd->err_info_rcvport.packet_flit1, sizeof(u64));
+ &dd->err_info_rcvport.packet_flit1, sizeof(u64));
memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
- &dd->err_info_rcvport.packet_flit2, sizeof(u64));
+ &dd->err_info_rcvport.packet_flit2, sizeof(u64));
/* ExcessiverBufferOverrunInfo */
reg = read_csr(dd, RCV_ERR_INFO);
if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
- /* if the RcvExcessBufferOverrun bit is set, save SC of
- * first pkt that encountered an excess buffer overrun */
+ /*
+ * if the RcvExcessBufferOverrun bit is set, save SC of
+ * first pkt that encountered an excess buffer overrun
+ */
u8 tmp = (u8)reg;
tmp &= RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
@@ -2892,7 +3060,8 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
}
static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
struct opa_clear_port_status *req =
(struct opa_clear_port_status *)pmp->data;
@@ -2951,8 +3120,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
/* Only applicable for switch */
- /*if (counter_select & CS_PORT_MARK_FECN)
- write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);*/
+ /* if (counter_select & CS_PORT_MARK_FECN)
+ * write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
+ */
if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
@@ -2975,7 +3145,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_LINK_ERROR_RECOVERY) {
write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
- CNTR_INVALID_VL, 0);
+ CNTR_INVALID_VL, 0);
}
if (counter_select & CS_PORT_RCV_ERRORS)
@@ -2997,7 +3167,6 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
8 * sizeof(vl_select_mask)) {
-
if (counter_select & CS_PORT_XMIT_DATA)
write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
@@ -3026,9 +3195,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_PORT_RCV_BUBBLE)
write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
- /*if (counter_select & CS_PORT_MARK_FECN)
- write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
- */
+ /* if (counter_select & CS_PORT_MARK_FECN)
+ * write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
+ */
/* port_vl_xmit_discards ??? */
}
@@ -3039,14 +3208,15 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
}
static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
struct _port_ei *rsp;
struct opa_port_error_info_msg *req;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u64 port_mask;
u32 num_ports;
- unsigned long port_num;
+ u8 port_num;
u8 num_pslm;
u32 error_info_select;
@@ -3071,7 +3241,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
port_num = find_first_bit((unsigned long *)&port_mask,
sizeof(port_mask));
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -3085,8 +3255,10 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
/* ExcessiverBufferOverrunInfo */
if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
- /* status bit is essentially kept in the h/w - bit 5 of
- * RCV_ERR_INFO */
+ /*
+ * status bit is essentially kept in the h/w - bit 5 of
+ * RCV_ERR_INFO
+ */
write_csr(dd, RCV_ERR_INFO,
RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
@@ -3138,13 +3310,12 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
- u8 *data,
- struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 *data, struct ib_device *ibdev,
+ u8 port, u32 *resp_len)
{
int i;
struct opa_congestion_setting_attr *p =
- (struct opa_congestion_setting_attr *) data;
+ (struct opa_congestion_setting_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct opa_congestion_setting_entry_shadow *entries;
@@ -3154,7 +3325,7 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
cc_state = get_cc_state(ppd);
- if (cc_state == NULL) {
+ if (!cc_state) {
rcu_read_unlock();
return reply((struct ib_mad_hdr *)smp);
}
@@ -3183,7 +3354,7 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
u32 *resp_len)
{
struct opa_congestion_setting_attr *p =
- (struct opa_congestion_setting_attr *) data;
+ (struct opa_congestion_setting_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct opa_congestion_setting_entry_shadow *entries;
@@ -3245,7 +3416,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
continue;
memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
memcpy(cong_log->events[i].remote_qp_number_cn_entry,
- &cce->rqpn, 3);
+ &cce->rqpn, 3);
cong_log->events[i].sl_svc_type_cn_entry =
((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
cong_log->events[i].remote_lid_cn_entry =
@@ -3275,7 +3446,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
u32 *resp_len)
{
struct ib_cc_table_attr *cc_table_attr =
- (struct ib_cc_table_attr *) data;
+ (struct ib_cc_table_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 start_block = OPA_AM_START_BLK(am);
@@ -3296,7 +3467,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
cc_state = get_cc_state(ppd);
- if (cc_state == NULL) {
+ if (!cc_state) {
rcu_read_unlock();
return reply((struct ib_mad_hdr *)smp);
}
@@ -3316,7 +3487,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
rcu_read_unlock();
if (resp_len)
- *resp_len += sizeof(u16)*(IB_CCT_ENTRIES * n_blocks + 1);
+ *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
return reply((struct ib_mad_hdr *)smp);
}
@@ -3332,7 +3503,7 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
{
- struct ib_cc_table_attr *p = (struct ib_cc_table_attr *) data;
+ struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 start_block = OPA_AM_START_BLK(am);
@@ -3362,14 +3533,14 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
}
new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
- if (new_cc_state == NULL)
+ if (!new_cc_state)
goto getit;
spin_lock(&ppd->cc_state_lock);
old_cc_state = get_cc_state(ppd);
- if (old_cc_state == NULL) {
+ if (!old_cc_state) {
spin_unlock(&ppd->cc_state_lock);
kfree(new_cc_state);
return reply((struct ib_mad_hdr *)smp);
@@ -3409,26 +3580,31 @@ struct opa_led_info {
};
#define OPA_LED_SHIFT 31
-#define OPA_LED_MASK (1 << OPA_LED_SHIFT)
+#define OPA_LED_MASK BIT(OPA_LED_SHIFT)
static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- struct opa_led_info *p = (struct opa_led_info *) data;
+ struct hfi1_pportdata *ppd = dd->pport;
+ struct opa_led_info *p = (struct opa_led_info *)data;
u32 nport = OPA_AM_NPORT(am);
- u64 reg;
+ u32 is_beaconing_active;
if (nport != 1) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
- reg = read_csr(dd, DCC_CFG_LED_CNTRL);
- if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) &&
- ((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf))
- p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK);
+ /*
+ * This pairs with the memory barrier in hfi1_start_led_override to
+ * ensure that we read the correct state of LED beaconing represented
+ * by led_override_timer_active
+ */
+ smp_rmb();
+ is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+ p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
if (resp_len)
*resp_len += sizeof(struct opa_led_info);
@@ -3441,7 +3617,7 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
u32 *resp_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- struct opa_led_info *p = (struct opa_led_info *) data;
+ struct opa_led_info *p = (struct opa_led_info *)data;
u32 nport = OPA_AM_NPORT(am);
int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
@@ -3450,7 +3626,10 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- setextled(dd, on);
+ if (on)
+ hfi1_start_led_override(dd->pport, 2000, 1500);
+ else
+ shutdown_led_override(dd->pport);
return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
}
@@ -3493,7 +3672,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
@@ -3532,9 +3711,9 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
resp_len);
break;
case IB_SMP_ATTR_SM_INFO:
- if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
- if (ibp->port_cap_flags & IB_PORT_SM)
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
/* FALLTHROUGH */
default:
@@ -3575,7 +3754,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
@@ -3602,9 +3781,9 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
resp_len);
break;
case IB_SMP_ATTR_SM_INFO:
- if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
- if (ibp->port_cap_flags & IB_PORT_SM)
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
/* FALLTHROUGH */
default:
@@ -3654,14 +3833,13 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
/* zero the payload for this segment */
memset(next_smp + sizeof(*agg), 0, agg_data_len);
- (void) subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
+ (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
ibdev, port, NULL);
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
}
next_smp += agg_size;
-
}
return reply((struct ib_mad_hdr *)smp);
@@ -3698,14 +3876,13 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
return reply((struct ib_mad_hdr *)smp);
}
- (void) subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
+ (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
ibdev, port, NULL);
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
}
next_smp += agg_size;
-
}
return reply((struct ib_mad_hdr *)smp);
@@ -3823,7 +4000,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
if (smp->class_version != OPA_SMI_CLASS_VERSION) {
smp->status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_mad_hdr *)smp);
- goto bail;
+ return ret;
}
ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
smp->route.dr.dr_slid, smp->route.dr.return_path,
@@ -3843,13 +4020,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
smp->method == IB_MGMT_METHOD_SET) &&
port_num && port_num <= ibdev->phys_port_cnt &&
port != port_num)
- (void) check_mkey(to_iport(ibdev, port_num),
+ (void)check_mkey(to_iport(ibdev, port_num),
(struct ib_mad_hdr *)smp, 0,
smp->mkey, smp->route.dr.dr_slid,
smp->route.dr.return_path,
smp->hop_cnt);
ret = IB_MAD_RESULT_FAILURE;
- goto bail;
+ return ret;
}
*resp_len = opa_get_smp_header_size(smp);
@@ -3861,23 +4038,25 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
clear_opa_smp_data(smp);
ret = subn_get_opa_sma(attr_id, smp, am, data,
ibdev, port, resp_len);
- goto bail;
+ break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_get_opa_aggregate(smp, ibdev, port,
resp_len);
- goto bail;
+ break;
}
+ break;
case IB_MGMT_METHOD_SET:
switch (attr_id) {
default:
ret = subn_set_opa_sma(attr_id, smp, am, data,
ibdev, port, resp_len);
- goto bail;
+ break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_set_opa_aggregate(smp, ibdev, port,
resp_len);
- goto bail;
+ break;
}
+ break;
case IB_MGMT_METHOD_TRAP:
case IB_MGMT_METHOD_REPORT:
case IB_MGMT_METHOD_REPORT_RESP:
@@ -3888,13 +4067,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
* Just tell the caller to process it normally.
*/
ret = IB_MAD_RESULT_SUCCESS;
- goto bail;
+ break;
default:
smp->status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_mad_hdr *)smp);
+ break;
}
-bail:
return ret;
}
@@ -3910,7 +4089,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
if (smp->class_version != 1) {
smp->status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_mad_hdr *)smp);
- goto bail;
+ return ret;
}
ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
@@ -3931,13 +4110,13 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
smp->method == IB_MGMT_METHOD_SET) &&
port_num && port_num <= ibdev->phys_port_cnt &&
port != port_num)
- (void) check_mkey(to_iport(ibdev, port_num),
- (struct ib_mad_hdr *)smp, 0,
- smp->mkey,
- (__force __be32)smp->dr_slid,
- smp->return_path, smp->hop_cnt);
+ (void)check_mkey(to_iport(ibdev, port_num),
+ (struct ib_mad_hdr *)smp, 0,
+ smp->mkey,
+ (__force __be32)smp->dr_slid,
+ smp->return_path, smp->hop_cnt);
ret = IB_MAD_RESULT_FAILURE;
- goto bail;
+ return ret;
}
switch (smp->method) {
@@ -3945,15 +4124,77 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
switch (smp->attr_id) {
case IB_SMP_ATTR_NODE_INFO:
ret = subn_get_nodeinfo(smp, ibdev, port);
- goto bail;
+ break;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
- goto bail;
+ break;
}
+ break;
+ }
+
+ return ret;
+}
+
+static int process_perf(struct ib_device *ibdev, u8 port,
+ const struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
+ struct ib_class_port_info *cpi = (struct ib_class_port_info *)
+ &pmp->data;
+ int ret = IB_MAD_RESULT_FAILURE;
+
+ *out_mad = *in_mad;
+ if (pmp->mad_hdr.class_version != 1) {
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_mad_hdr *)pmp);
+ return ret;
+ }
+
+ switch (pmp->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ switch (pmp->mad_hdr.attr_id) {
+ case IB_PMA_PORT_COUNTERS:
+ ret = pma_get_ib_portcounters(pmp, ibdev, port);
+ break;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
+ break;
+ case IB_PMA_CLASS_PORT_INFO:
+ cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ ret = reply((struct ib_mad_hdr *)pmp);
+ break;
+ default:
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_mad_hdr *)pmp);
+ break;
+ }
+ break;
+
+ case IB_MGMT_METHOD_SET:
+ if (pmp->mad_hdr.attr_id) {
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_mad_hdr *)pmp);
+ }
+ break;
+
+ case IB_MGMT_METHOD_TRAP:
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ break;
+
+ default:
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_mad_hdr *)pmp);
+ break;
}
-bail:
return ret;
}
@@ -3978,44 +4219,46 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
switch (pmp->mad_hdr.attr_id) {
case IB_PMA_CLASS_PORT_INFO:
ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
- goto bail;
+ break;
case OPA_PM_ATTRIB_ID_PORT_STATUS:
ret = pma_get_opa_portstatus(pmp, ibdev, port,
- resp_len);
- goto bail;
+ resp_len);
+ break;
case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
ret = pma_get_opa_datacounters(pmp, ibdev, port,
- resp_len);
- goto bail;
+ resp_len);
+ break;
case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
ret = pma_get_opa_porterrors(pmp, ibdev, port,
- resp_len);
- goto bail;
+ resp_len);
+ break;
case OPA_PM_ATTRIB_ID_ERROR_INFO:
ret = pma_get_opa_errorinfo(pmp, ibdev, port,
- resp_len);
- goto bail;
+ resp_len);
+ break;
default:
pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)pmp);
- goto bail;
+ break;
}
+ break;
case IB_MGMT_METHOD_SET:
switch (pmp->mad_hdr.attr_id) {
case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
ret = pma_set_opa_portstatus(pmp, ibdev, port,
- resp_len);
- goto bail;
+ resp_len);
+ break;
case OPA_PM_ATTRIB_ID_ERROR_INFO:
ret = pma_set_opa_errorinfo(pmp, ibdev, port,
- resp_len);
- goto bail;
+ resp_len);
+ break;
default:
pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)pmp);
- goto bail;
+ break;
}
+ break;
case IB_MGMT_METHOD_TRAP:
case IB_MGMT_METHOD_GET_RESP:
@@ -4025,14 +4268,14 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
* Just tell the caller to process it normally.
*/
ret = IB_MAD_RESULT_SUCCESS;
- goto bail;
+ break;
default:
pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_mad_hdr *)pmp);
+ break;
}
-bail:
return ret;
}
@@ -4097,12 +4340,15 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
- goto bail;
+ break;
+ case IB_MGMT_CLASS_PERF_MGMT:
+ ret = process_perf(ibdev, port, in_mad, out_mad);
+ break;
default:
ret = IB_MAD_RESULT_SUCCESS;
+ break;
}
-bail:
return ret;
}
@@ -4154,66 +4400,3 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
return IB_MAD_RESULT_FAILURE;
}
-
-static void send_handler(struct ib_mad_agent *agent,
- struct ib_mad_send_wc *mad_send_wc)
-{
- ib_free_send_mad(mad_send_wc->send_buf);
-}
-
-int hfi1_create_agents(struct hfi1_ibdev *dev)
-{
- struct hfi1_devdata *dd = dd_from_dev(dev);
- struct ib_mad_agent *agent;
- struct hfi1_ibport *ibp;
- int p;
- int ret;
-
- for (p = 0; p < dd->num_pports; p++) {
- ibp = &dd->pport[p].ibport_data;
- agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
- NULL, 0, send_handler,
- NULL, NULL, 0);
- if (IS_ERR(agent)) {
- ret = PTR_ERR(agent);
- goto err;
- }
-
- ibp->send_agent = agent;
- }
-
- return 0;
-
-err:
- for (p = 0; p < dd->num_pports; p++) {
- ibp = &dd->pport[p].ibport_data;
- if (ibp->send_agent) {
- agent = ibp->send_agent;
- ibp->send_agent = NULL;
- ib_unregister_mad_agent(agent);
- }
- }
-
- return ret;
-}
-
-void hfi1_free_agents(struct hfi1_ibdev *dev)
-{
- struct hfi1_devdata *dd = dd_from_dev(dev);
- struct ib_mad_agent *agent;
- struct hfi1_ibport *ibp;
- int p;
-
- for (p = 0; p < dd->num_pports; p++) {
- ibp = &dd->pport[p].ibport_data;
- if (ibp->send_agent) {
- agent = ibp->send_agent;
- ibp->send_agent = NULL;
- ib_unregister_mad_agent(agent);
- }
- if (ibp->sm_ah) {
- ib_destroy_ah(&ibp->sm_ah->ibah);
- ibp->sm_ah = NULL;
- }
- }
-}
diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h
index f0317750e2fc..55ee08675333 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/staging/rdma/hfi1/mad.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -51,8 +48,10 @@
#define _HFI1_MAD_H
#include <rdma/ib_pma.h>
-#define USE_PI_LED_ENABLE 1 /* use led enabled bit in struct
- * opa_port_states, if available */
+#define USE_PI_LED_ENABLE 1 /*
+ * use led enabled bit in struct
+ * opa_port_states, if available
+ */
#include <rdma/opa_smi.h>
#include <rdma/opa_port_info.h>
#ifndef PI_LED_ENABLE_SUP
@@ -235,7 +234,6 @@ struct ib_pma_portcounters_cong {
#define IB_CC_SVCTYPE_RD 0x2
#define IB_CC_SVCTYPE_UD 0x3
-
/*
* There should be an equivalent IB #define for the following, but
* I cannot find it.
@@ -267,7 +265,7 @@ struct opa_hfi1_cong_log {
u8 congestion_flags;
__be16 threshold_event_counter;
__be32 current_time_stamp;
- u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+ u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
struct opa_hfi1_cong_log_event events[OPA_CONG_LOG_ELEMS];
} __packed;
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
new file mode 100644
index 000000000000..c7ad0164ea9a
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/list.h>
+#include <linux/mmu_notifier.h>
+#include <linux/interval_tree_generic.h>
+
+#include "mmu_rb.h"
+#include "trace.h"
+
+struct mmu_rb_handler {
+ struct list_head list;
+ struct mmu_notifier mn;
+ struct rb_root *root;
+ spinlock_t lock; /* protect the RB tree */
+ struct mmu_rb_ops *ops;
+};
+
+static LIST_HEAD(mmu_rb_handlers);
+static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
+
+static unsigned long mmu_node_start(struct mmu_rb_node *);
+static unsigned long mmu_node_last(struct mmu_rb_node *);
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
+static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
+ unsigned long);
+static inline void mmu_notifier_range_start(struct mmu_notifier *,
+ struct mm_struct *,
+ unsigned long, unsigned long);
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
+ unsigned long, unsigned long);
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
+ unsigned long, unsigned long);
+
+static struct mmu_notifier_ops mn_opts = {
+ .invalidate_page = mmu_notifier_page,
+ .invalidate_range_start = mmu_notifier_range_start,
+};
+
+INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
+ mmu_node_start, mmu_node_last, static, __mmu_int_rb);
+
+static unsigned long mmu_node_start(struct mmu_rb_node *node)
+{
+ return node->addr & PAGE_MASK;
+}
+
+static unsigned long mmu_node_last(struct mmu_rb_node *node)
+{
+ return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1;
+}
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+{
+ struct mmu_rb_handler *handlr;
+ unsigned long flags;
+
+ if (!ops->invalidate)
+ return -EINVAL;
+
+ handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+ if (!handlr)
+ return -ENOMEM;
+
+ handlr->root = root;
+ handlr->ops = ops;
+ INIT_HLIST_NODE(&handlr->mn.hlist);
+ spin_lock_init(&handlr->lock);
+ handlr->mn.ops = &mn_opts;
+ spin_lock_irqsave(&mmu_rb_lock, flags);
+ list_add_tail(&handlr->list, &mmu_rb_handlers);
+ spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+ return mmu_notifier_register(&handlr->mn, current->mm);
+}
+
+void hfi1_mmu_rb_unregister(struct rb_root *root)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ unsigned long flags;
+
+ if (!handler)
+ return;
+
+ spin_lock_irqsave(&mmu_rb_lock, flags);
+ list_del(&handler->list);
+ spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+ if (!RB_EMPTY_ROOT(root)) {
+ struct rb_node *node;
+ struct mmu_rb_node *rbnode;
+
+ while ((node = rb_first(root))) {
+ rbnode = rb_entry(node, struct mmu_rb_node, node);
+ rb_erase(node, root);
+ if (handler->ops->remove)
+ handler->ops->remove(root, rbnode, false);
+ }
+ }
+
+ if (current->mm)
+ mmu_notifier_unregister(&handler->mn, current->mm);
+ kfree(handler);
+}
+
+int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *node;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!handler)
+ return -EINVAL;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
+ mnode->len);
+ node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+ if (node) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ __mmu_int_rb_insert(mnode, root);
+
+ if (handler->ops->insert) {
+ ret = handler->ops->insert(root, mnode);
+ if (ret)
+ __mmu_int_rb_remove(mnode, root);
+ }
+unlock:
+ spin_unlock_irqrestore(&handler->lock, flags);
+ return ret;
+}
+
+/* Caller must host handler lock */
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+ unsigned long addr,
+ unsigned long len)
+{
+ struct mmu_rb_node *node = NULL;
+
+ hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
+ if (!handler->ops->filter) {
+ node = __mmu_int_rb_iter_first(handler->root, addr,
+ (addr + len) - 1);
+ } else {
+ for (node = __mmu_int_rb_iter_first(handler->root, addr,
+ (addr + len) - 1);
+ node;
+ node = __mmu_int_rb_iter_next(node, addr,
+ (addr + len) - 1)) {
+ if (handler->ops->filter(node, addr, len))
+ return node;
+ }
+ }
+ return node;
+}
+
+static void __mmu_rb_remove(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *node, bool arg)
+{
+ /* Validity of handler and node pointers has been checked by caller. */
+ hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
+ node->len);
+ __mmu_int_rb_remove(node, handler->root);
+ if (handler->ops->remove)
+ handler->ops->remove(handler->root, node, arg);
+}
+
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
+ unsigned long len)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *node;
+ unsigned long flags;
+
+ if (!handler)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock_irqsave(&handler->lock, flags);
+ node = __mmu_rb_search(handler, addr, len);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ return node;
+}
+
+void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ unsigned long flags;
+
+ if (!handler || !node)
+ return;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ __mmu_rb_remove(handler, node, false);
+ spin_unlock_irqrestore(&handler->lock, flags);
+}
+
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
+{
+ struct mmu_rb_handler *handler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mmu_rb_lock, flags);
+ list_for_each_entry(handler, &mmu_rb_handlers, list) {
+ if (handler->root == root)
+ goto unlock;
+ }
+ handler = NULL;
+unlock:
+ spin_unlock_irqrestore(&mmu_rb_lock, flags);
+ return handler;
+}
+
+static inline void mmu_notifier_page(struct mmu_notifier *mn,
+ struct mm_struct *mm, unsigned long addr)
+{
+ mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
+}
+
+static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ mmu_notifier_mem_invalidate(mn, start, end);
+}
+
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
+ unsigned long start, unsigned long end)
+{
+ struct mmu_rb_handler *handler =
+ container_of(mn, struct mmu_rb_handler, mn);
+ struct rb_root *root = handler->root;
+ struct mmu_rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ for (node = __mmu_int_rb_iter_first(root, start, end - 1); node;
+ node = __mmu_int_rb_iter_next(node, start, end - 1)) {
+ hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
+ node->addr, node->len);
+ if (handler->ops->invalidate(root, node))
+ __mmu_rb_remove(handler, node, true);
+ }
+ spin_unlock_irqrestore(&handler->lock, flags);
+}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
new file mode 100644
index 000000000000..f8523fdb8a18
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MMU_RB_H
+#define _HFI1_MMU_RB_H
+
+#include "hfi.h"
+
+struct mmu_rb_node {
+ unsigned long addr;
+ unsigned long len;
+ unsigned long __last;
+ struct rb_node node;
+};
+
+struct mmu_rb_ops {
+ bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
+ int (*insert)(struct rb_root *, struct mmu_rb_node *);
+ void (*remove)(struct rb_root *, struct mmu_rb_node *, bool);
+ int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+};
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
+void hfi1_mmu_rb_unregister(struct rb_root *);
+int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
+ unsigned long);
+
+#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
deleted file mode 100644
index 38253212af7a..000000000000
--- a/drivers/staging/rdma/hfi1/mr.c
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "hfi.h"
-
-/* Fast memory region */
-struct hfi1_fmr {
- struct ib_fmr ibfmr;
- struct hfi1_mregion mr; /* must be last */
-};
-
-static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct hfi1_fmr, ibfmr);
-}
-
-static int init_mregion(struct hfi1_mregion *mr, struct ib_pd *pd,
- int count)
-{
- int m, i = 0;
- int rval = 0;
-
- m = DIV_ROUND_UP(count, HFI1_SEGSZ);
- for (; i < m; i++) {
- mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
- if (!mr->map[i])
- goto bail;
- }
- mr->mapsz = m;
- init_completion(&mr->comp);
- /* count returning the ptr to user */
- atomic_set(&mr->refcount, 1);
- mr->pd = pd;
- mr->max_segs = count;
-out:
- return rval;
-bail:
- while (i)
- kfree(mr->map[--i]);
- rval = -ENOMEM;
- goto out;
-}
-
-static void deinit_mregion(struct hfi1_mregion *mr)
-{
- int i = mr->mapsz;
-
- mr->mapsz = 0;
- while (i)
- kfree(mr->map[--i]);
-}
-
-
-/**
- * hfi1_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
- */
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct hfi1_mr *mr = NULL;
- struct ib_mr *ret;
- int rval;
-
- if (to_ipd(pd)->user) {
- ret = ERR_PTR(-EPERM);
- goto bail;
- }
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- rval = init_mregion(&mr->mr, pd, 0);
- if (rval) {
- ret = ERR_PTR(rval);
- goto bail;
- }
-
-
- rval = hfi1_alloc_lkey(&mr->mr, 1);
- if (rval) {
- ret = ERR_PTR(rval);
- goto bail_mregion;
- }
-
- mr->mr.access_flags = acc;
- ret = &mr->ibmr;
-done:
- return ret;
-
-bail_mregion:
- deinit_mregion(&mr->mr);
-bail:
- kfree(mr);
- goto done;
-}
-
-static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
-{
- struct hfi1_mr *mr;
- int rval = -ENOMEM;
- int m;
-
- /* Allocate struct plus pointers to first level page tables. */
- m = DIV_ROUND_UP(count, HFI1_SEGSZ);
- mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
- if (!mr)
- goto bail;
-
- rval = init_mregion(&mr->mr, pd, count);
- if (rval)
- goto bail;
-
- rval = hfi1_alloc_lkey(&mr->mr, 0);
- if (rval)
- goto bail_mregion;
- mr->ibmr.lkey = mr->mr.lkey;
- mr->ibmr.rkey = mr->mr.lkey;
-done:
- return mr;
-
-bail_mregion:
- deinit_mregion(&mr->mr);
-bail:
- kfree(mr);
- mr = ERR_PTR(rval);
- goto done;
-}
-
-/**
- * hfi1_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_udata *udata)
-{
- struct hfi1_mr *mr;
- struct ib_umem *umem;
- struct scatterlist *sg;
- int n, m, entry;
- struct ib_mr *ret;
-
- if (length == 0) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
-
- umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags, 0);
- if (IS_ERR(umem))
- return (void *) umem;
-
- n = umem->nmap;
-
- mr = alloc_mr(n, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- ib_umem_release(umem);
- goto bail;
- }
-
- mr->mr.user_base = start;
- mr->mr.iova = virt_addr;
- mr->mr.length = length;
- mr->mr.offset = ib_umem_offset(umem);
- mr->mr.access_flags = mr_access_flags;
- mr->umem = umem;
-
- if (is_power_of_2(umem->page_size))
- mr->mr.page_shift = ilog2(umem->page_size);
- m = 0;
- n = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- void *vaddr;
-
- vaddr = page_address(sg_page(sg));
- if (!vaddr) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- n++;
- if (n == HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
- * hfi1_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by hfi1_get_dma_mr()
- * or hfi1_reg_user_mr().
- */
-int hfi1_dereg_mr(struct ib_mr *ibmr)
-{
- struct hfi1_mr *mr = to_imr(ibmr);
- int ret = 0;
- unsigned long timeout;
-
- hfi1_free_lkey(&mr->mr);
-
- hfi1_put_mr(&mr->mr); /* will set completion if last */
- timeout = wait_for_completion_timeout(&mr->mr.comp,
- 5 * HZ);
- if (!timeout) {
- dd_dev_err(
- dd_from_ibdev(mr->mr.pd->device),
- "hfi1_dereg_mr timeout mr %p pd %p refcount %u\n",
- mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
- hfi1_get_mr(&mr->mr);
- ret = -EBUSY;
- goto out;
- }
- deinit_mregion(&mr->mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
- kfree(mr);
-out:
- return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- * FIXME: IB_WR_REG_MR is not supported
- */
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
-{
- struct hfi1_mr *mr;
-
- if (mr_type != IB_MR_TYPE_MEM_REG)
- return ERR_PTR(-EINVAL);
-
- mr = alloc_mr(max_num_sg, pd);
- if (IS_ERR(mr))
- return (struct ib_mr *)mr;
-
- return &mr->ibmr;
-}
-
-/**
- * hfi1_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
-{
- struct hfi1_fmr *fmr;
- int m;
- struct ib_fmr *ret;
- int rval = -ENOMEM;
-
- /* Allocate struct plus pointers to first level page tables. */
- m = DIV_ROUND_UP(fmr_attr->max_pages, HFI1_SEGSZ);
- fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
- if (!fmr)
- goto bail;
-
- rval = init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
- if (rval)
- goto bail;
-
- /*
- * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
- * rkey.
- */
- rval = hfi1_alloc_lkey(&fmr->mr, 0);
- if (rval)
- goto bail_mregion;
- fmr->ibfmr.rkey = fmr->mr.lkey;
- fmr->ibfmr.lkey = fmr->mr.lkey;
- /*
- * Resources are allocated but no valid mapping (RKEY can't be
- * used).
- */
- fmr->mr.access_flags = mr_access_flags;
- fmr->mr.max_segs = fmr_attr->max_pages;
- fmr->mr.page_shift = fmr_attr->page_shift;
-
- ret = &fmr->ibfmr;
-done:
- return ret;
-
-bail_mregion:
- deinit_mregion(&fmr->mr);
-bail:
- kfree(fmr);
- ret = ERR_PTR(rval);
- goto done;
-}
-
-/**
- * hfi1_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova)
-{
- struct hfi1_fmr *fmr = to_ifmr(ibfmr);
- struct hfi1_lkey_table *rkt;
- unsigned long flags;
- int m, n, i;
- u32 ps;
- int ret;
-
- i = atomic_read(&fmr->mr.refcount);
- if (i > 2)
- return -EBUSY;
-
- if (list_len > fmr->mr.max_segs) {
- ret = -EINVAL;
- goto bail;
- }
- rkt = &to_idev(ibfmr->device)->lk_table;
- spin_lock_irqsave(&rkt->lock, flags);
- fmr->mr.user_base = iova;
- fmr->mr.iova = iova;
- ps = 1 << fmr->mr.page_shift;
- fmr->mr.length = list_len * ps;
- m = 0;
- n = 0;
- for (i = 0; i < list_len; i++) {
- fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
- fmr->mr.map[m]->segs[n].length = ps;
- if (++n == HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * hfi1_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int hfi1_unmap_fmr(struct list_head *fmr_list)
-{
- struct hfi1_fmr *fmr;
- struct hfi1_lkey_table *rkt;
- unsigned long flags;
-
- list_for_each_entry(fmr, fmr_list, ibfmr.list) {
- rkt = &to_idev(fmr->ibfmr.device)->lk_table;
- spin_lock_irqsave(&rkt->lock, flags);
- fmr->mr.user_base = 0;
- fmr->mr.iova = 0;
- fmr->mr.length = 0;
- spin_unlock_irqrestore(&rkt->lock, flags);
- }
- return 0;
-}
-
-/**
- * hfi1_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr)
-{
- struct hfi1_fmr *fmr = to_ifmr(ibfmr);
- int ret = 0;
- unsigned long timeout;
-
- hfi1_free_lkey(&fmr->mr);
- hfi1_put_mr(&fmr->mr); /* will set completion if last */
- timeout = wait_for_completion_timeout(&fmr->mr.comp,
- 5 * HZ);
- if (!timeout) {
- hfi1_get_mr(&fmr->mr);
- ret = -EBUSY;
- goto out;
- }
- deinit_mregion(&fmr->mr);
- kfree(fmr);
-out:
- return ret;
-}
diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/staging/rdma/hfi1/opa_compat.h
index f64eec1c2951..6ef3c1cbdcd7 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/staging/rdma/hfi1/opa_compat.h
@@ -1,14 +1,13 @@
#ifndef _LINUX_H
#define _LINUX_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -111,19 +108,4 @@ enum opa_port_phys_state {
/* values 12-15 are reserved/ignored */
};
-/* OPA_PORT_TYPE_* definitions - these belong in opa_port_info.h */
-#define OPA_PORT_TYPE_UNKNOWN 0
-#define OPA_PORT_TYPE_DISCONNECTED 1
-/* port is not currently usable, CableInfo not available */
-#define OPA_PORT_TYPE_FIXED 2
-/* A fixed backplane port in a director class switch. All OPA ASICS */
-#define OPA_PORT_TYPE_VARIABLE 3
-/* A backplane port in a blade system, possibly mixed configuration */
-#define OPA_PORT_TYPE_STANDARD 4
-/* implies a SFF-8636 defined format for CableInfo (QSFP) */
-#define OPA_PORT_TYPE_SI_PHOTONICS 5
-/* A silicon photonics module implies TBD defined format for CableInfo
- * as defined by Intel SFO group */
-/* 6 - 15 are reserved */
-
#endif /* _LINUX_H */
diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c
index 47ca6314e328..0bac21e6a658 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/staging/rdma/hfi1/pcie.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -57,6 +54,7 @@
#include "hfi.h"
#include "chip_registers.h"
+#include "aspm.h"
/* link speed vector for Gen3 speed - not in Linux headers */
#define GEN1_SPEED_VECTOR 0x1
@@ -122,8 +120,9 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
goto bail;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- } else
+ } else {
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ }
if (ret) {
hfi1_early_err(&pdev->dev,
"Unable to set DMA consistent mask: %d\n", ret);
@@ -131,13 +130,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
}
pci_set_master(pdev);
- ret = pci_enable_pcie_error_reporting(pdev);
- if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to enable pcie error reporting: %d\n",
- ret);
- ret = 0;
- }
+ (void)pci_enable_pcie_error_reporting(pdev);
goto done;
bail:
@@ -222,10 +215,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
- &dd->pcie_devctl2);
+ &dd->pcie_devctl2);
pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
- pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
- &dd->pci_lnkctl3);
+ pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
return 0;
@@ -238,7 +230,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
*/
void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
{
- u64 __iomem *base = (void __iomem *) dd->kregbase;
+ u64 __iomem *base = (void __iomem *)dd->kregbase;
dd->flags &= ~HFI1_PRESENT;
dd->kregbase = NULL;
@@ -274,7 +266,7 @@ void hfi1_pcie_flr(struct hfi1_devdata *dd)
clear:
pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
- PCI_EXP_DEVCTL_BCR_FLR);
+ PCI_EXP_DEVCTL_BCR_FLR);
/* PCIe spec requires the function to be back within 100ms */
msleep(100);
}
@@ -287,9 +279,11 @@ static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
struct msix_entry *msix_entry;
int i;
- /* We can't pass hfi1_msix_entry array to msix_setup
+ /*
+ * We can't pass hfi1_msix_entry array to msix_setup
* so use a dummy msix_entry array and copy the allocated
- * irq back to the hfi1_msix_entry array. */
+ * irq back to the hfi1_msix_entry array.
+ */
msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
if (!msix_entry) {
ret = -ENOMEM;
@@ -319,7 +313,6 @@ do_intx:
nvec, ret);
*msixcnt = 0;
hfi1_enable_intx(dd->pcidev);
-
}
/* return the PCIe link speed from the given link status */
@@ -367,6 +360,7 @@ static void update_lbus_info(struct hfi1_devdata *dd)
int pcie_speeds(struct hfi1_devdata *dd)
{
u32 linkcap;
+ struct pci_dev *parent = dd->pcidev->bus->self;
if (!pci_is_pcie(dd->pcidev)) {
dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@ -379,15 +373,15 @@ int pcie_speeds(struct hfi1_devdata *dd)
pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
dd_dev_info(dd,
- "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
- linkcap & PCI_EXP_LNKCAP_SLS);
+ "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
+ linkcap & PCI_EXP_LNKCAP_SLS);
dd->link_gen3_capable = 0;
}
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
@@ -395,9 +389,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
/* obtain the link width and current speed */
update_lbus_info(dd);
- /* check against expected pcie width and complain if "wrong" */
- if (dd->lbus_width < 16)
- dd_dev_err(dd, "PCIe width %u (x16 HFI)\n", dd->lbus_width);
+ dd_dev_info(dd, "%s\n", dd->lbus_info);
return 0;
}
@@ -436,23 +428,18 @@ void hfi1_enable_intx(struct pci_dev *pdev)
void restore_pci_variables(struct hfi1_devdata *dd)
{
pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
- pci_write_config_dword(dd->pcidev,
- PCI_BASE_ADDRESS_0, dd->pcibar0);
- pci_write_config_dword(dd->pcidev,
- PCI_BASE_ADDRESS_1, dd->pcibar1);
- pci_write_config_dword(dd->pcidev,
- PCI_ROM_ADDRESS, dd->pci_rom);
+ pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
+ pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
+ pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
- dd->pcie_devctl2);
+ dd->pcie_devctl2);
pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
- pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
- dd->pci_lnkctl3);
+ pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
}
-
/*
* BIOS may not set PCIe bus-utilization parameters for best performance.
* Check and optionally adjust them to maximize our throughput.
@@ -461,6 +448,10 @@ static int hfi1_pcie_caps;
module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
static void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
@@ -479,6 +470,12 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
}
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
+ /*
+ * The driver cannot perform the tuning if it does not have
+ * access to the upstream component.
+ */
+ if (!parent)
+ return;
if (!pci_is_root_bus(parent->bus)) {
dd_dev_info(dd, "Parent not root\n");
return;
@@ -532,6 +529,7 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
pcie_set_readrq(dd->pcidev, ep_mrrs);
}
}
+
/* End of PCIe capability tuning */
/*
@@ -746,21 +744,22 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div);
c_plus1 = eq[i][POST] / div;
pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
- eq_value(c_minus1, c0, c_plus1));
+ eq_value(c_minus1, c0, c_plus1));
/* check if these coefficients violate EQ rules */
pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
- &violation);
+ &violation);
if (violation
& PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
if (hit_error == 0) {
dd_dev_err(dd,
- "Gen3 EQ Table Coefficient rule violations\n");
+ "Gen3 EQ Table Coefficient rule violations\n");
dd_dev_err(dd, " prec attn post\n");
}
dd_dev_err(dd, " p%02d: %02x %02x %02x\n",
- i, (u32)eq[i][0], (u32)eq[i][1], (u32)eq[i][2]);
+ i, (u32)eq[i][0], (u32)eq[i][1],
+ (u32)eq[i][2]);
dd_dev_err(dd, " %02x %02x %02x\n",
- (u32)c_minus1, (u32)c0, (u32)c_plus1);
+ (u32)c_minus1, (u32)c0, (u32)c_plus1);
hit_error = 1;
}
}
@@ -772,7 +771,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
/*
* Steps to be done after the PCIe firmware is downloaded and
* before the SBR for the Pcie Gen3.
- * The hardware mutex is already being held.
+ * The SBus resource is already being held.
*/
static void pcie_post_steps(struct hfi1_devdata *dd)
{
@@ -815,8 +814,8 @@ static int trigger_sbr(struct hfi1_devdata *dd)
list_for_each_entry(pdev, &dev->bus->devices, bus_list)
if (pdev != dev) {
dd_dev_err(dd,
- "%s: another device is on the same bus\n",
- __func__);
+ "%s: another device is on the same bus\n",
+ __func__);
return -ENOTTY;
}
@@ -840,8 +839,8 @@ static void write_gasket_interrupt(struct hfi1_devdata *dd, int index,
u16 code, u16 data)
{
write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8),
- (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT)
- |((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
+ (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) |
+ ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
}
/*
@@ -851,14 +850,13 @@ static void arm_gasket_logic(struct hfi1_devdata *dd)
{
u64 reg;
- reg = (((u64)1 << dd->hfi1_id)
- << ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT)
- | ((u64)pcie_serdes_broadcast[dd->hfi1_id]
- << ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT
- | ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK
- | ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK)
- << ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT
- );
+ reg = (((u64)1 << dd->hfi1_id) <<
+ ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) |
+ ((u64)pcie_serdes_broadcast[dd->hfi1_id] <<
+ ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT |
+ ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK |
+ ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) <<
+ ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT);
write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg);
/* read back to push the write */
read_csr(dd, ASIC_PCIE_SD_HOST_CMD);
@@ -946,7 +944,7 @@ static void write_xmt_margin(struct hfi1_devdata *dd, const char *fname)
*/
int do_pcie_gen3_transition(struct hfi1_devdata *dd)
{
- struct pci_dev *parent;
+ struct pci_dev *parent = dd->pcidev->bus->self;
u64 fw_ctrl;
u64 reg, therm;
u32 reg32, fs, lf;
@@ -955,8 +953,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
int do_retry, retry_count = 0;
uint default_pset;
u16 target_vector, target_speed;
- u16 lnkctl, lnkctl2, vendor;
- u8 nsbr = 1;
+ u16 lnkctl2, vendor;
u8 div;
const u8 (*eq)[3];
int return_error = 0;
@@ -983,17 +980,21 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
/* if already at target speed, done (unless forced) */
if (dd->lbus_speed == target_speed) {
dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__,
- pcie_target,
- pcie_force ? "re-doing anyway" : "skipping");
+ pcie_target,
+ pcie_force ? "re-doing anyway" : "skipping");
if (!pcie_force)
return 0;
}
/*
- * A0 needs an additional SBR
+ * The driver cannot do the transition if it has no access to the
+ * upstream component
*/
- if (is_ax(dd))
- nsbr++;
+ if (!parent) {
+ dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n",
+ __func__);
+ return 0;
+ }
/*
* Do the Gen3 transition. Steps are those of the PCIe Gen3
@@ -1009,10 +1010,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
goto done_no_mutex;
}
- /* hold the HW mutex across the firmware download and SBR */
- ret = acquire_hw_mutex(dd);
- if (ret)
+ /* hold the SBus resource across the firmware download and SBR */
+ ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+ if (ret) {
+ dd_dev_err(dd, "%s: unable to acquire SBus resource\n",
+ __func__);
return ret;
+ }
/* make sure thermal polling is not causing interrupts */
therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN);
@@ -1030,8 +1034,11 @@ retry:
/* step 4: download PCIe Gen3 SerDes firmware */
dd_dev_info(dd, "%s: downloading firmware\n", __func__);
ret = load_pcie_firmware(dd);
- if (ret)
+ if (ret) {
+ /* do not proceed if the firmware cannot be downloaded */
+ return_error = 1;
goto done;
+ }
/* step 5: set up device parameter settings */
dd_dev_info(dd, "%s: setting PCIe registers\n", __func__);
@@ -1091,8 +1098,10 @@ retry:
default_pset = DEFAULT_MCP_PSET;
}
pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
- (fs << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT)
- | (lf << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
+ (fs <<
+ PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) |
+ (lf <<
+ PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
ret = load_eq_table(dd, eq, fs, div);
if (ret)
goto done;
@@ -1106,15 +1115,15 @@ retry:
pcie_pset = default_pset;
if (pcie_pset > 10) { /* valid range is 0-10, inclusive */
dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
- __func__, pcie_pset, default_pset);
+ __func__, pcie_pset, default_pset);
pcie_pset = default_pset;
}
dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset);
pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
- ((1 << pcie_pset)
- << PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT)
- | PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK
- | PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
+ ((1 << pcie_pset) <<
+ PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
+ PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
+ PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
/*
* step 5b: Do post firmware download steps via SBus
@@ -1142,11 +1151,12 @@ retry:
*/
write_xmt_margin(dd, __func__);
- /* step 5e: disable active state power management (ASPM) */
+ /*
+ * step 5e: disable active state power management (ASPM). It
+ * will be enabled if required later
+ */
dd_dev_info(dd, "%s: clearing ASPM\n", __func__);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &lnkctl);
- lnkctl &= ~PCI_EXP_LNKCTL_ASPMC;
- pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, lnkctl);
+ aspm_hw_disable_l1(dd);
/*
* step 5f: clear DirectSpeedChange
@@ -1165,16 +1175,15 @@ retry:
* that it is Gen3 capable earlier.
*/
dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
- parent = dd->pcidev->bus->self;
pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
+ (u32)lnkctl2);
/* only write to parent if target is not as high as ours */
if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
+ (u32)lnkctl2);
pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
} else {
dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
@@ -1183,17 +1192,17 @@ retry:
dd_dev_info(dd, "%s: setting target link speed\n", __func__);
pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
+ (u32)lnkctl2);
lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
+ (u32)lnkctl2);
pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
/* step 5h: arm gasket logic */
/* hold DC in reset across the SBR */
write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
- (void) read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
+ (void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
/* save firmware control across the SBR */
fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL);
@@ -1224,8 +1233,8 @@ retry:
ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor);
if (ret) {
dd_dev_info(dd,
- "%s: read of VendorID failed after SBR, err %d\n",
- __func__, ret);
+ "%s: read of VendorID failed after SBR, err %d\n",
+ __func__, ret);
return_error = 1;
goto done;
}
@@ -1265,8 +1274,7 @@ retry:
write_csr(dd, CCE_DC_CTRL, 0);
/* Set the LED off */
- if (is_ax(dd))
- setextled(dd, 0);
+ setextled(dd, 0);
/* check for any per-lane errors */
pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
@@ -1277,8 +1285,8 @@ retry:
& ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK;
if ((status & (1 << dd->hfi1_id)) == 0) {
dd_dev_err(dd,
- "%s: gasket status 0x%x, expecting 0x%x\n",
- __func__, status, 1 << dd->hfi1_id);
+ "%s: gasket status 0x%x, expecting 0x%x\n",
+ __func__, status, 1 << dd->hfi1_id);
ret = -EIO;
goto done;
}
@@ -1295,13 +1303,13 @@ retry:
/* update our link information cache */
update_lbus_info(dd);
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
- dd->lbus_info);
+ dd->lbus_info);
if (dd->lbus_speed != target_speed) { /* not target */
/* maybe retry */
do_retry = retry_count < pcie_retry;
dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
- pcie_target, do_retry ? ", retrying" : "");
+ pcie_target, do_retry ? ", retrying" : "");
retry_count++;
if (do_retry) {
msleep(100); /* allow time to settle */
@@ -1317,7 +1325,7 @@ done:
dd_dev_info(dd, "%s: Re-enable therm polling\n",
__func__);
}
- release_hw_mutex(dd);
+ release_chip_resource(dd, CR_SBUS);
done_no_mutex:
/* return no error if it is OK to be at current speed */
if (ret && !return_error) {
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c
index b51a4416312b..c6849ce9e5eb 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/staging/rdma/hfi1/pio.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -101,7 +98,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
/* Fall through */
case PSC_DATA_VL_ENABLE:
/* Disallow sending on VLs not enabled */
- mask = (((~0ull)<<num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK)<<
+ mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
SEND_CTRL_UNSUPPORTED_VL_SHIFT;
reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
break;
@@ -130,7 +127,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
if (write) {
write_csr(dd, SEND_CTRL, reg);
if (flush)
- (void) read_csr(dd, SEND_CTRL); /* flush write */
+ (void)read_csr(dd, SEND_CTRL); /* flush write */
}
spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
@@ -177,8 +174,10 @@ static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = {
/* memory pool information, used when calculating final sizes */
struct mem_pool_info {
- int centipercent; /* 100th of 1% of memory to use, -1 if blocks
- already set */
+ int centipercent; /*
+ * 100th of 1% of memory to use, -1 if blocks
+ * already set
+ */
int count; /* count of contexts in the pool */
int blocks; /* block size of the pool */
int size; /* context size, in blocks */
@@ -312,7 +311,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
if (i == SC_ACK) {
count = dd->n_krcv_queues;
} else if (i == SC_KERNEL) {
- count = num_vls + 1 /* VL15 */;
+ count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
} else if (count == SCC_PER_CPU) {
count = dd->num_rcv_contexts - dd->n_krcv_queues;
} else if (count < 0) {
@@ -509,7 +508,7 @@ static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context)
sci = &dd->send_contexts[sw_index];
if (!sci->allocated) {
dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
- __func__, sw_index, hw_context);
+ __func__, sw_index, hw_context);
}
sci->allocated = 0;
dd->hw_to_sw[hw_context] = INVALID_SCI;
@@ -625,7 +624,7 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
& SC(CREDIT_CTRL_THRESHOLD_MASK))
<< SC(CREDIT_CTRL_THRESHOLD_SHIFT));
write_kctxt_csr(sc->dd, sc->hw_context,
- SC(CREDIT_CTRL), sc->credit_ctrl);
+ SC(CREDIT_CTRL), sc->credit_ctrl);
/* force a credit return on change to avoid a possible stall */
force_return = 1;
@@ -700,7 +699,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
if (dd->flags & HFI1_FROZEN)
return NULL;
- sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa);
+ sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa);
if (!sc)
return NULL;
@@ -763,9 +762,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
/* set the default partition key */
write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
- (DEFAULT_PKEY &
- SC(CHECK_PARTITION_KEY_VALUE_MASK))
- << SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
+ (SC(CHECK_PARTITION_KEY_VALUE_MASK) &
+ DEFAULT_PKEY) <<
+ SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
/* per context type checks */
if (type == SC_USER) {
@@ -778,8 +777,8 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
/* set the send context check opcode mask and value */
write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
- ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
- ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
+ ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
+ ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
/* set up credit return */
reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
@@ -797,7 +796,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
thresh = sc_percent_to_threshold(sc, 50);
} else if (type == SC_USER) {
thresh = sc_percent_to_threshold(sc,
- user_credit_return_threshold);
+ user_credit_return_threshold);
} else { /* kernel */
thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
}
@@ -852,7 +851,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->credit_ctrl,
thresh);
-
return sc;
}
@@ -971,11 +969,11 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
if (loop > 500) {
/* timed out - bounce the link */
dd_dev_err(dd,
- "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
- __func__, sc->sw_index,
- sc->hw_context, (u32)reg);
+ "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
+ __func__, sc->sw_index,
+ sc->hw_context, (u32)reg);
queue_work(dd->pport->hfi1_wq,
- &dd->pport->link_bounce_work);
+ &dd->pport->link_bounce_work);
break;
}
loop++;
@@ -1021,7 +1019,7 @@ int sc_restart(struct send_context *sc)
return -EINVAL;
dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
- sc->hw_context);
+ sc->hw_context);
/*
* Step 1: Wait for the context to actually halt.
@@ -1036,7 +1034,7 @@ int sc_restart(struct send_context *sc)
break;
if (loop > 100) {
dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
- __func__, sc->sw_index, sc->hw_context);
+ __func__, sc->sw_index, sc->hw_context);
return -ETIME;
}
loop++;
@@ -1062,9 +1060,9 @@ int sc_restart(struct send_context *sc)
break;
if (loop > 100) {
dd_dev_err(dd,
- "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
- __func__, sc->sw_index,
- sc->hw_context, count);
+ "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
+ __func__, sc->sw_index,
+ sc->hw_context, count);
}
loop++;
udelay(1);
@@ -1177,18 +1175,18 @@ void pio_reset_all(struct hfi1_devdata *dd)
if (ret == -EIO) {
/* clear the error */
write_csr(dd, SEND_PIO_ERR_CLEAR,
- SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
+ SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
}
/* reset init all */
write_csr(dd, SEND_PIO_INIT_CTXT,
- SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
+ SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
udelay(2);
ret = pio_init_wait_progress(dd);
if (ret < 0) {
dd_dev_err(dd,
- "PIO send context init %s while initializing all PIO blocks\n",
- ret == -ETIMEDOUT ? "is stuck" : "had an error");
+ "PIO send context init %s while initializing all PIO blocks\n",
+ ret == -ETIMEDOUT ? "is stuck" : "had an error");
}
}
@@ -1236,8 +1234,7 @@ int sc_enable(struct send_context *sc)
*/
reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
if (reg)
- write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR),
- reg);
+ write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg);
/*
* The HW PIO initialization engine can handle only one init
@@ -1295,7 +1292,7 @@ void sc_return_credits(struct send_context *sc)
/* a 0->1 transition schedules a credit return */
write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
- SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
+ SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
/*
* Ensure that the write is flushed and the credit return is
* scheduled. We care more about the 0 -> 1 transition.
@@ -1321,7 +1318,7 @@ void sc_drop(struct send_context *sc)
return;
dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
- __func__, sc->sw_index, sc->hw_context);
+ __func__, sc->sw_index, sc->hw_context);
}
/*
@@ -1346,7 +1343,7 @@ void sc_stop(struct send_context *sc, int flag)
wake_up(&sc->halt_wait);
}
-#define BLOCK_DWORDS (PIO_BLOCK_SIZE/sizeof(u32))
+#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32))
#define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
/*
@@ -1430,8 +1427,10 @@ retry:
next = head + 1;
if (next >= sc->sr_size)
next = 0;
- /* update the head - must be last! - the releaser can look at fields
- in pbuf once we move the head */
+ /*
+ * update the head - must be last! - the releaser can look at fields
+ * in pbuf once we move the head
+ */
smp_wmb();
sc->sr_head = next;
spin_unlock_irqrestore(&sc->alloc_lock, flags);
@@ -1469,7 +1468,7 @@ void sc_add_credit_return_intr(struct send_context *sc)
if (sc->credit_intr_count == 0) {
sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
write_kctxt_csr(sc->dd, sc->hw_context,
- SC(CREDIT_CTRL), sc->credit_ctrl);
+ SC(CREDIT_CTRL), sc->credit_ctrl);
}
sc->credit_intr_count++;
spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
@@ -1491,7 +1490,7 @@ void sc_del_credit_return_intr(struct send_context *sc)
if (sc->credit_intr_count == 0) {
sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
write_kctxt_csr(sc->dd, sc->hw_context,
- SC(CREDIT_CTRL), sc->credit_ctrl);
+ SC(CREDIT_CTRL), sc->credit_ctrl);
}
spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
}
@@ -1526,8 +1525,9 @@ static void sc_piobufavail(struct send_context *sc)
struct hfi1_devdata *dd = sc->dd;
struct hfi1_ibdev *dev = &dd->verbs_dev;
struct list_head *list;
- struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE];
- struct hfi1_qp *qp;
+ struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
+ struct rvt_qp *qp;
+ struct hfi1_qp_priv *priv;
unsigned long flags;
unsigned i, n = 0;
@@ -1545,24 +1545,28 @@ static void sc_piobufavail(struct send_context *sc)
struct iowait *wait;
if (n == ARRAY_SIZE(qps))
- goto full;
+ break;
wait = list_first_entry(list, struct iowait, list);
- qp = container_of(wait, struct hfi1_qp, s_iowait);
- list_del_init(&qp->s_iowait.list);
+ qp = iowait_to_qp(wait);
+ priv = qp->priv;
+ list_del_init(&priv->s_iowait.list);
/* refcount held until actual wake up */
qps[n++] = qp;
}
/*
- * Counting: only call wantpiobuf_intr() if there were waiters and they
- * are now all gone.
+ * If there had been waiters and there are more
+ * insure that we redo the force to avoid a potential hang.
*/
- if (n)
+ if (n) {
hfi1_sc_wantpiobuf_intr(sc, 0);
-full:
+ if (!list_empty(list))
+ hfi1_sc_wantpiobuf_intr(sc, 1);
+ }
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
for (i = 0; i < n; i++)
- hfi1_qp_wakeup(qps[i], HFI1_S_WAIT_PIO);
+ hfi1_qp_wakeup(qps[i],
+ RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
}
/* translate a send credit update to a bit code of reasons */
@@ -1661,7 +1665,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
sw_index = dd->hw_to_sw[hw_context];
if (unlikely(sw_index >= dd->num_send_contexts)) {
dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
- __func__, hw_context, sw_index);
+ __func__, hw_context, sw_index);
goto done;
}
sc = dd->send_contexts[sw_index].sc;
@@ -1674,8 +1678,8 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
sw_index = dd->hw_to_sw[gc];
if (unlikely(sw_index >= dd->num_send_contexts)) {
dd_dev_err(dd,
- "%s: invalid hw (%u) to sw (%u) mapping\n",
- __func__, hw_context, sw_index);
+ "%s: invalid hw (%u) to sw (%u) mapping\n",
+ __func__, hw_context, sw_index);
continue;
}
sc_release_update(dd->send_contexts[sw_index].sc);
@@ -1684,11 +1688,217 @@ done:
spin_unlock(&dd->sc_lock);
}
+/*
+ * pio_select_send_context_vl() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns a send context based on the selector and a vl.
+ * The mapping fields are protected by RCU
+ */
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+ u32 selector, u8 vl)
+{
+ struct pio_vl_map *m;
+ struct pio_map_elem *e;
+ struct send_context *rval;
+
+ /*
+ * NOTE This should only happen if SC->VL changed after the initial
+ * checks on the QP/AH
+ * Default will return VL0's send context below
+ */
+ if (unlikely(vl >= num_vls)) {
+ rval = NULL;
+ goto done;
+ }
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->pio_map);
+ if (unlikely(!m)) {
+ rcu_read_unlock();
+ return dd->vld[0].sc;
+ }
+ e = m->map[vl & m->mask];
+ rval = e->ksc[selector & e->mask];
+ rcu_read_unlock();
+
+done:
+ rval = !rval ? dd->vld[0].sc : rval;
+ return rval;
+}
+
+/*
+ * pio_select_send_context_sc() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @sc5: the 5 bit sc
+ *
+ * This function returns an send context based on the selector and an sc
+ */
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+ u32 selector, u8 sc5)
+{
+ u8 vl = sc_to_vlt(dd, sc5);
+
+ return pio_select_send_context_vl(dd, selector, vl);
+}
+
+/*
+ * Free the indicated map struct
+ */
+static void pio_map_free(struct pio_vl_map *m)
+{
+ int i;
+
+ for (i = 0; m && i < m->actual_vls; i++)
+ kfree(m->map[i]);
+ kfree(m);
+}
+
+/*
+ * Handle RCU callback
+ */
+static void pio_map_rcu_callback(struct rcu_head *list)
+{
+ struct pio_vl_map *m = container_of(list, struct pio_vl_map, list);
+
+ pio_map_free(m);
+}
+
+/*
+ * pio_map_init - called when #vls change
+ * @dd: hfi1_devdata
+ * @port: port number
+ * @num_vls: number of vls
+ * @vl_scontexts: per vl send context mapping (optional)
+ *
+ * This routine changes the mapping based on the number of vls.
+ *
+ * vl_scontexts is used to specify a non-uniform vl/send context
+ * loading. NULL implies auto computing the loading and giving each
+ * VL an uniform distribution of send contexts per VL.
+ *
+ * The auto algorithm computers the sc_per_vl and the number of extra
+ * send contexts. Any extra send contexts are added from the last VL
+ * on down
+ *
+ * rcu locking is used here to control access to the mapping fields.
+ *
+ * If either the num_vls or num_send_contexts are non-power of 2, the
+ * array sizes in the struct pio_vl_map and the struct pio_map_elem are
+ * rounded up to the next highest power of 2 and the first entry is
+ * reused in a round robin fashion.
+ *
+ * If an error occurs the map change is not done and the mapping is not
+ * chaged.
+ *
+ */
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
+{
+ int i, j;
+ int extra, sc_per_vl;
+ int scontext = 1;
+ int num_kernel_send_contexts = 0;
+ u8 lvl_scontexts[OPA_MAX_VLS];
+ struct pio_vl_map *oldmap, *newmap;
+
+ if (!vl_scontexts) {
+ /* send context 0 reserved for VL15 */
+ for (i = 1; i < dd->num_send_contexts; i++)
+ if (dd->send_contexts[i].type == SC_KERNEL)
+ num_kernel_send_contexts++;
+ /* truncate divide */
+ sc_per_vl = num_kernel_send_contexts / num_vls;
+ /* extras */
+ extra = num_kernel_send_contexts % num_vls;
+ vl_scontexts = lvl_scontexts;
+ /* add extras from last vl down */
+ for (i = num_vls - 1; i >= 0; i--, extra--)
+ vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
+ }
+ /* build new map */
+ newmap = kzalloc(sizeof(*newmap) +
+ roundup_pow_of_two(num_vls) *
+ sizeof(struct pio_map_elem *),
+ GFP_KERNEL);
+ if (!newmap)
+ goto bail;
+ newmap->actual_vls = num_vls;
+ newmap->vls = roundup_pow_of_two(num_vls);
+ newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+ for (i = 0; i < newmap->vls; i++) {
+ /* save for wrap around */
+ int first_scontext = scontext;
+
+ if (i < newmap->actual_vls) {
+ int sz = roundup_pow_of_two(vl_scontexts[i]);
+
+ /* only allocate once */
+ newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
+ sz * sizeof(struct
+ send_context *),
+ GFP_KERNEL);
+ if (!newmap->map[i])
+ goto bail;
+ newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
+ /* assign send contexts */
+ for (j = 0; j < sz; j++) {
+ if (dd->kernel_send_context[scontext])
+ newmap->map[i]->ksc[j] =
+ dd->kernel_send_context[scontext];
+ if (++scontext >= first_scontext +
+ vl_scontexts[i])
+ /* wrap back to first send context */
+ scontext = first_scontext;
+ }
+ } else {
+ /* just re-use entry without allocating */
+ newmap->map[i] = newmap->map[i % num_vls];
+ }
+ scontext = first_scontext + vl_scontexts[i];
+ }
+ /* newmap in hand, save old map */
+ spin_lock_irq(&dd->pio_map_lock);
+ oldmap = rcu_dereference_protected(dd->pio_map,
+ lockdep_is_held(&dd->pio_map_lock));
+
+ /* publish newmap */
+ rcu_assign_pointer(dd->pio_map, newmap);
+
+ spin_unlock_irq(&dd->pio_map_lock);
+ /* success, free any old map after grace period */
+ if (oldmap)
+ call_rcu(&oldmap->list, pio_map_rcu_callback);
+ return 0;
+bail:
+ /* free any partial allocation */
+ pio_map_free(newmap);
+ return -ENOMEM;
+}
+
+void free_pio_map(struct hfi1_devdata *dd)
+{
+ /* Free PIO map if allocated */
+ if (rcu_access_pointer(dd->pio_map)) {
+ spin_lock_irq(&dd->pio_map_lock);
+ pio_map_free(rcu_access_pointer(dd->pio_map));
+ RCU_INIT_POINTER(dd->pio_map, NULL);
+ spin_unlock_irq(&dd->pio_map_lock);
+ synchronize_rcu();
+ }
+ kfree(dd->kernel_send_context);
+ dd->kernel_send_context = NULL;
+}
+
int init_pervl_scs(struct hfi1_devdata *dd)
{
int i;
- u64 mask, all_vl_mask = (u64) 0x80ff; /* VLs 0-7, 15 */
+ u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */
+ u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */
u32 ctxt;
+ struct hfi1_pportdata *ppd = dd->pport;
dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
dd->rcd[0]->rcvhdrqentsize, dd->node);
@@ -1696,6 +1906,12 @@ int init_pervl_scs(struct hfi1_devdata *dd)
goto nomem;
hfi1_init_ctxt(dd->vld[15].sc);
dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
+
+ dd->kernel_send_context = kmalloc_node(dd->num_send_contexts *
+ sizeof(struct send_context *),
+ GFP_KERNEL, dd->node);
+ dd->kernel_send_context[0] = dd->vld[15].sc;
+
for (i = 0; i < num_vls; i++) {
/*
* Since this function does not deal with a specific
@@ -1708,12 +1924,19 @@ int init_pervl_scs(struct hfi1_devdata *dd)
dd->rcd[0]->rcvhdrqentsize, dd->node);
if (!dd->vld[i].sc)
goto nomem;
-
+ dd->kernel_send_context[i + 1] = dd->vld[i].sc;
hfi1_init_ctxt(dd->vld[i].sc);
-
/* non VL15 start with the max MTU */
dd->vld[i].mtu = hfi1_max_mtu;
}
+ for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+ dd->kernel_send_context[i + 1] =
+ sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node);
+ if (!dd->kernel_send_context[i + 1])
+ goto nomem;
+ hfi1_init_ctxt(dd->kernel_send_context[i + 1]);
+ }
+
sc_enable(dd->vld[15].sc);
ctxt = dd->vld[15].sc->hw_context;
mask = all_vl_mask & ~(1LL << 15);
@@ -1721,17 +1944,29 @@ int init_pervl_scs(struct hfi1_devdata *dd)
dd_dev_info(dd,
"Using send context %u(%u) for VL15\n",
dd->vld[15].sc->sw_index, ctxt);
+
for (i = 0; i < num_vls; i++) {
sc_enable(dd->vld[i].sc);
ctxt = dd->vld[i].sc->hw_context;
- mask = all_vl_mask & ~(1LL << i);
+ mask = all_vl_mask & ~(data_vls_mask);
write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
}
+ for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+ sc_enable(dd->kernel_send_context[i + 1]);
+ ctxt = dd->kernel_send_context[i + 1]->hw_context;
+ mask = all_vl_mask & ~(data_vls_mask);
+ write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
+ }
+
+ if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
+ goto nomem;
return 0;
nomem:
sc_free(dd->vld[15].sc);
for (i = 0; i < num_vls; i++)
sc_free(dd->vld[i].sc);
+ for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
+ sc_free(dd->kernel_send_context[i + 1]);
return -ENOMEM;
}
@@ -1769,11 +2004,11 @@ int init_credit_return(struct hfi1_devdata *dd)
bytes,
&dd->cr_base[i].pa,
GFP_KERNEL);
- if (dd->cr_base[i].va == NULL) {
+ if (!dd->cr_base[i].va) {
set_dev_node(&dd->pcidev->dev, dd->node);
dd_dev_err(dd,
- "Unable to allocate credit return DMA range for NUMA %d\n",
- i);
+ "Unable to allocate credit return DMA range for NUMA %d\n",
+ i);
ret = -ENOMEM;
goto done;
}
@@ -1797,10 +2032,10 @@ void free_credit_return(struct hfi1_devdata *dd)
for (i = 0; i < num_numa; i++) {
if (dd->cr_base[i].va) {
dma_free_coherent(&dd->pcidev->dev,
- TXE_NUM_CONTEXTS
- * sizeof(struct credit_return),
- dd->cr_base[i].va,
- dd->cr_base[i].pa);
+ TXE_NUM_CONTEXTS *
+ sizeof(struct credit_return),
+ dd->cr_base[i].va,
+ dd->cr_base[i].pa);
}
}
kfree(dd->cr_base);
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h
index 53d3e0a79375..0026976ce4f6 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/staging/rdma/hfi1/pio.h
@@ -1,14 +1,13 @@
#ifndef _PIO_H
#define _PIO_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -50,7 +47,6 @@
*
*/
-
/* send context types */
#define SC_KERNEL 0
#define SC_ACK 1
@@ -106,6 +102,7 @@ struct send_context {
struct hfi1_devdata *dd; /* device */
void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
+
volatile __le64 *hw_free; /* HW free counter */
struct work_struct halt_work; /* halted context work queue entry */
unsigned long flags; /* flags */
@@ -165,6 +162,112 @@ struct sc_config_sizes {
short int count;
};
+/*
+ * The diagram below details the relationship of the mapping structures
+ *
+ * Since the mapping now allows for non-uniform send contexts per vl, the
+ * number of send contexts for a vl is either the vl_scontexts[vl] or
+ * a computation based on num_kernel_send_contexts/num_vls:
+ *
+ * For example:
+ * nactual = vl_scontexts ? vl_scontexts[vl] : num_kernel_send_contexts/num_vls
+ *
+ * n = roundup to next highest power of 2 using nactual
+ *
+ * In the case where there are num_kernel_send_contexts/num_vls doesn't divide
+ * evenly, the extras are added from the last vl downward.
+ *
+ * For the case where n > nactual, the send contexts are assigned
+ * in a round robin fashion wrapping back to the first send context
+ * for a particular vl.
+ *
+ * dd->pio_map
+ * | pio_map_elem[0]
+ * | +--------------------+
+ * v | mask |
+ * pio_vl_map |--------------------|
+ * +--------------------------+ | ksc[0] -> sc 1 |
+ * | list (RCU) | |--------------------|
+ * |--------------------------| ->| ksc[1] -> sc 2 |
+ * | mask | --/ |--------------------|
+ * |--------------------------| -/ | * |
+ * | actual_vls (max 8) | -/ |--------------------|
+ * |--------------------------| --/ | ksc[n] -> sc n |
+ * | vls (max 8) | -/ +--------------------+
+ * |--------------------------| --/
+ * | map[0] |-/
+ * |--------------------------| +--------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |--------------------|
+ * | * | \-- | ksc[0] -> sc 1+n |
+ * | * | \---- |--------------------|
+ * | * | \->| ksc[1] -> sc 2+n |
+ * |--------------------------| |--------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |--------------------|
+ * \- | ksc[m] -> sc m+n |
+ * \ +--------------------+
+ * \-
+ * \
+ * \- +--------------------+
+ * \- | mask |
+ * \ |--------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |--------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |--------------------|
+ * | * |
+ * |--------------------|
+ * | ksc[o] -> sc o+m+n |
+ * +--------------------+
+ *
+ */
+
+/* Initial number of send contexts per VL */
+#define INIT_SC_PER_VL 2
+
+/*
+ * struct pio_map_elem - mapping for a vl
+ * @mask - selector mask
+ * @ksc - array of kernel send contexts for this vl
+ *
+ * The mask is used to "mod" the selector to
+ * produce index into the trailing array of
+ * kscs
+ */
+struct pio_map_elem {
+ u32 mask;
+ struct send_context *ksc[0];
+};
+
+/*
+ * struct pio_vl_map - mapping for a vl
+ * @list - rcu head for free callback
+ * @mask - vl mask to "mod" the vl to produce an index to map array
+ * @actual_vls - number of vls
+ * @vls - numbers of vls rounded to next power of 2
+ * @map - array of pio_map_elem entries
+ *
+ * This is the parent mapping structure. The trailing members of the
+ * struct point to pio_map_elem entries, which in turn point to an
+ * array of kscs for that vl.
+ */
+struct pio_vl_map {
+ struct rcu_head list;
+ u32 mask;
+ u8 actual_vls;
+ u8 vls;
+ struct pio_map_elem *map[0];
+};
+
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
+ u8 *vl_scontexts);
+void free_pio_map(struct hfi1_devdata *dd);
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+ u32 selector, u8 vl);
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+ u32 selector, u8 sc5);
+
/* send context functions */
int init_credit_return(struct hfi1_devdata *dd);
void free_credit_return(struct hfi1_devdata *dd);
@@ -183,7 +286,7 @@ void sc_flush(struct send_context *sc);
void sc_drop(struct send_context *sc);
void sc_stop(struct send_context *sc, int bit);
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
- pio_release_cb cb, void *arg);
+ pio_release_cb cb, void *arg);
void sc_release_update(struct send_context *sc);
void sc_return_credits(struct send_context *sc);
void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
@@ -212,12 +315,11 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd);
void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl);
void pio_send_control(struct hfi1_devdata *dd, int op);
-
/* PIO copy routines */
void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
const void *from, size_t count);
void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
- const void *from, size_t nbytes);
+ const void *from, size_t nbytes);
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
void seg_pio_copy_end(struct pio_buf *pbuf);
diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c
index 64bef6c26653..8c25e1b58849 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/staging/rdma/hfi1/pio_copy.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -52,9 +49,9 @@
/* additive distance between non-SOP and SOP space */
#define SOP_DISTANCE (TXE_PIO_SIZE / 2)
-#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
+#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
/* number of QUADWORDs in a block */
-#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
+#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
/**
* pio_copy - copy data block to MMIO space
@@ -83,11 +80,13 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
/* calculate where the QWORD data ends - in SOP=1 space */
- dend = dest + ((count>>1) * sizeof(u64));
+ dend = dest + ((count >> 1) * sizeof(u64));
if (dend < send) {
- /* all QWORD data is within the SOP block, does *not*
- reach the end of the SOP block */
+ /*
+ * all QWORD data is within the SOP block, does *not*
+ * reach the end of the SOP block
+ */
while (dest < dend) {
writeq(*(u64 *)from, dest);
@@ -152,8 +151,10 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
writeq(val.val64, dest);
dest += sizeof(u64);
}
- /* fill in rest of block, no need to check pbuf->end
- as we only wrap on a block boundary */
+ /*
+ * fill in rest of block, no need to check pbuf->end
+ * as we only wrap on a block boundary
+ */
while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
writeq(0, dest);
dest += sizeof(u64);
@@ -177,7 +178,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
* "zero" shift - bit shift used to zero out upper bytes. Input is
* the count of LSB bytes to preserve.
*/
-#define zshift(x) (8 * (8-(x)))
+#define zshift(x) (8 * (8 - (x)))
/*
* "merge" shift - bit shift used to merge with carry bytes. Input is
@@ -196,7 +197,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
* o nbytes must not span a QW boundary
*/
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
+ unsigned int nbytes)
{
unsigned long off;
@@ -223,7 +224,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
* o nbytes may span a QW boundary
*/
static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
+ const void *from, unsigned int nbytes)
{
unsigned long off = (unsigned long)from & 0x7;
unsigned int room, xbytes;
@@ -244,7 +245,7 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
pbuf->carry.val64 |= (((*(u64 *)from)
>> mshift(off))
<< zshift(xbytes))
- >> zshift(xbytes+pbuf->carry_bytes);
+ >> zshift(xbytes + pbuf->carry_bytes);
off = 0;
pbuf->carry_bytes += xbytes;
nbytes -= xbytes;
@@ -362,7 +363,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
* o from may _not_ be u64 aligned.
*/
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
+ unsigned int nbytes)
{
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
@@ -377,7 +378,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
* o nbytes may span a QW boundary
*/
static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
+ const void *from, unsigned int nbytes)
{
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
pbuf->carry_bytes += nbytes;
@@ -411,7 +412,7 @@ static inline void merge_write8(
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
writeq(pbuf->carry.val64, dest);
- jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
+ jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
}
/*
@@ -433,7 +434,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest)
u64 zero = 0;
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
- 8 - pbuf->carry_bytes);
+ 8 - pbuf->carry_bytes);
writeq(pbuf->carry.val64, dest);
return 1;
}
@@ -453,7 +454,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest)
* @nbytes: bytes to copy
*/
void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
- const void *from, size_t nbytes)
+ const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + SOP_DISTANCE;
void __iomem *send = dest + PIO_BLOCK_SIZE;
@@ -463,11 +464,13 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
/* calculate where the QWORD data ends - in SOP=1 space */
- dend = dest + ((nbytes>>3) * sizeof(u64));
+ dend = dest + ((nbytes >> 3) * sizeof(u64));
if (dend < send) {
- /* all QWORD data is within the SOP block, does *not*
- reach the end of the SOP block */
+ /*
+ * all QWORD data is within the SOP block, does *not*
+ * reach the end of the SOP block
+ */
while (dest < dend) {
writeq(*(u64 *)from, dest);
@@ -562,8 +565,10 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
void __iomem *send; /* SOP end */
void __iomem *xend;
- /* calculate the end of data or end of block, whichever
- comes first */
+ /*
+ * calculate the end of data or end of block, whichever
+ * comes first
+ */
send = pbuf->start + PIO_BLOCK_SIZE;
xend = min(send, dend);
@@ -639,13 +644,13 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
* Must handle nbytes < 8.
*/
static void mid_copy_straight(struct pio_buf *pbuf,
- const void *from, size_t nbytes)
+ const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
/* calculate 8-byte data end */
- dend = dest + ((nbytes>>3) * sizeof(u64));
+ dend = dest + ((nbytes >> 3) * sizeof(u64));
if (pbuf->qw_written < PIO_BLOCK_QWS) {
/*
@@ -656,8 +661,10 @@ static void mid_copy_straight(struct pio_buf *pbuf,
void __iomem *send; /* SOP end */
void __iomem *xend;
- /* calculate the end of data or end of block, whichever
- comes first */
+ /*
+ * calculate the end of data or end of block, whichever
+ * comes first
+ */
send = pbuf->start + PIO_BLOCK_SIZE;
xend = min(send, dend);
@@ -713,7 +720,7 @@ static void mid_copy_straight(struct pio_buf *pbuf,
/* we know carry_bytes was zero on entry to this routine */
read_low_bytes(pbuf, from, nbytes & 0x7);
- pbuf->qw_written += nbytes>>3;
+ pbuf->qw_written += nbytes >> 3;
}
/*
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c
new file mode 100644
index 000000000000..0a1d074583e4
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/platform.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "efivar.h"
+
+void get_platform_config(struct hfi1_devdata *dd)
+{
+ int ret = 0;
+ unsigned long size = 0;
+ u8 *temp_platform_config = NULL;
+
+ ret = read_hfi1_efi_var(dd, "configuration", &size,
+ (void **)&temp_platform_config);
+ if (ret) {
+ dd_dev_info(dd,
+ "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+ __func__);
+ /* fall back to request firmware */
+ platform_config_load = 1;
+ goto bail;
+ }
+
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = size;
+
+bail:
+ /* exit */;
+}
+
+void free_platform_config(struct hfi1_devdata *dd)
+{
+ if (!platform_config_load) {
+ /*
+ * was loaded from EFI, release memory
+ * allocated by read_efi_var
+ */
+ kfree(dd->platform_config.data);
+ }
+ /*
+ * else do nothing, dispose_firmware will release
+ * struct firmware platform_config on driver exit
+ */
+}
+
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
+{
+ u8 tx_ctrl_byte = on ? 0x0 : 0xF;
+ int ret = 0;
+
+ ret = qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_TX_CTRL_BYTE_OFFS,
+ &tx_ctrl_byte, 1);
+ /* we expected 1, so consider 0 an error */
+ if (ret == 0)
+ ret = -EIO;
+ else if (ret == 1)
+ ret = 0;
+ return ret;
+}
+
+static int qual_power(struct hfi1_pportdata *ppd)
+{
+ u32 cable_power_class = 0, power_class_max = 0;
+ u8 *cache = ppd->qsfp_info.cache;
+ int ret = 0;
+
+ ret = get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+ SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, &power_class_max, 4);
+ if (ret)
+ return ret;
+
+ if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+ cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+ else
+ cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+ if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1))
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+ else if (cable_power_class > 4 && cable_power_class > (power_class_max))
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+ /*
+ * cable_power_class will never have value 4 as this simply
+ * means the high power settings are unused
+ */
+
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: Port disabled due to system power restrictions\n",
+ __func__);
+ ret = -EPERM;
+ }
+ return ret;
+}
+
+static int qual_bitrate(struct hfi1_pportdata *ppd)
+{
+ u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+ u8 *cache = ppd->qsfp_info.cache;
+
+ if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G) &&
+ cache[QSFP_NOM_BIT_RATE_250_OFFS] < 0x64)
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+ if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G) &&
+ cache[QSFP_NOM_BIT_RATE_100_OFFS] < 0x7D)
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: Cable failed bitrate check, disabling port\n",
+ __func__);
+ return -EPERM;
+ }
+ return 0;
+}
+
+static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
+{
+ u8 cable_power_class = 0, power_ctrl_byte = 0;
+ u8 *cache = ppd->qsfp_info.cache;
+ int ret;
+
+ if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+ cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+ else
+ cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+ if (cable_power_class) {
+ power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
+
+ power_ctrl_byte |= 1;
+ power_ctrl_byte &= ~(0x2);
+
+ ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+ QSFP_PWR_CTRL_BYTE_OFFS,
+ &power_ctrl_byte, 1);
+ if (ret != 1)
+ return -EIO;
+
+ if (cable_power_class > 3) {
+ /* > power class 4*/
+ power_ctrl_byte |= (1 << 2);
+ ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+ QSFP_PWR_CTRL_BYTE_OFFS,
+ &power_ctrl_byte, 1);
+ if (ret != 1)
+ return -EIO;
+ }
+
+ /* SFF 8679 rev 1.7 LPMode Deassert time */
+ msleep(300);
+ }
+ return 0;
+}
+
+static void apply_rx_cdr(struct hfi1_pportdata *ppd,
+ u32 rx_preset_index,
+ u8 *cdr_ctrl_byte)
+{
+ u32 rx_preset;
+ u8 *cache = ppd->qsfp_info.cache;
+
+ if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
+ (cache[QSFP_CDR_INFO_OFFS] & 0x40)))
+ return;
+
+ /* rx_preset preset to zero to catch error */
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+ rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
+ &rx_preset, 4);
+
+ if (!rx_preset) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: RX_CDR_APPLY is set to disabled\n",
+ __func__);
+ return;
+ }
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+ rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR,
+ &rx_preset, 4);
+
+ /* Expand cdr setting to all 4 lanes */
+ rx_preset = (rx_preset | (rx_preset << 1) |
+ (rx_preset << 2) | (rx_preset << 3));
+
+ if (rx_preset) {
+ *cdr_ctrl_byte |= rx_preset;
+ } else {
+ *cdr_ctrl_byte &= rx_preset;
+ /* Preserve current TX CDR status */
+ *cdr_ctrl_byte |= (cache[QSFP_CDR_CTRL_BYTE_OFFS] & 0xF0);
+ }
+}
+
+static void apply_tx_cdr(struct hfi1_pportdata *ppd,
+ u32 tx_preset_index,
+ u8 *ctr_ctrl_byte)
+{
+ u32 tx_preset;
+ u8 *cache = ppd->qsfp_info.cache;
+
+ if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
+ (cache[QSFP_CDR_INFO_OFFS] & 0x80)))
+ return;
+
+ get_platform_config_field(
+ ppd->dd,
+ PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+ TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, &tx_preset, 4);
+
+ if (!tx_preset) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: TX_CDR_APPLY is set to disabled\n",
+ __func__);
+ return;
+ }
+ get_platform_config_field(
+ ppd->dd,
+ PLATFORM_CONFIG_TX_PRESET_TABLE,
+ tx_preset_index,
+ TX_PRESET_TABLE_QSFP_TX_CDR, &tx_preset, 4);
+
+ /* Expand cdr setting to all 4 lanes */
+ tx_preset = (tx_preset | (tx_preset << 1) |
+ (tx_preset << 2) | (tx_preset << 3));
+
+ if (tx_preset)
+ *ctr_ctrl_byte |= (tx_preset << 4);
+ else
+ /* Preserve current/determined RX CDR status */
+ *ctr_ctrl_byte &= ((tx_preset << 4) | 0xF);
+}
+
+static void apply_cdr_settings(
+ struct hfi1_pportdata *ppd, u32 rx_preset_index,
+ u32 tx_preset_index)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+ u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
+
+ apply_rx_cdr(ppd, rx_preset_index, &cdr_ctrl_byte);
+
+ apply_tx_cdr(ppd, tx_preset_index, &cdr_ctrl_byte);
+
+ qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+ &cdr_ctrl_byte, 1);
+}
+
+static void apply_tx_eq_auto(struct hfi1_pportdata *ppd)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+ u8 tx_eq;
+
+ if (!(cache[QSFP_EQ_INFO_OFFS] & 0x8))
+ return;
+ /* Disable adaptive TX EQ if present */
+ tx_eq = cache[(128 * 3) + 241];
+ tx_eq &= 0xF0;
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 241, &tx_eq, 1);
+}
+
+static void apply_tx_eq_prog(struct hfi1_pportdata *ppd, u32 tx_preset_index)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+ u32 tx_preset;
+ u8 tx_eq;
+
+ if (!(cache[QSFP_EQ_INFO_OFFS] & 0x4))
+ return;
+
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+ tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ_APPLY,
+ &tx_preset, 4);
+ if (!tx_preset) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: TX_EQ_APPLY is set to disabled\n",
+ __func__);
+ return;
+ }
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+ tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ,
+ &tx_preset, 4);
+
+ if (((cache[(128 * 3) + 224] & 0xF0) >> 4) < tx_preset) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: TX EQ %x unsupported\n",
+ __func__, tx_preset);
+
+ dd_dev_info(
+ ppd->dd,
+ "%s: Applying EQ %x\n",
+ __func__, cache[608] & 0xF0);
+
+ tx_preset = (cache[608] & 0xF0) >> 4;
+ }
+
+ tx_eq = tx_preset | (tx_preset << 4);
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 234, &tx_eq, 1);
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 235, &tx_eq, 1);
+}
+
+static void apply_rx_eq_emp(struct hfi1_pportdata *ppd, u32 rx_preset_index)
+{
+ u32 rx_preset;
+ u8 rx_eq, *cache = ppd->qsfp_info.cache;
+
+ if (!(cache[QSFP_EQ_INFO_OFFS] & 0x2))
+ return;
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+ rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
+ &rx_preset, 4);
+
+ if (!rx_preset) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: RX_EMP_APPLY is set to disabled\n",
+ __func__);
+ return;
+ }
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+ rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP,
+ &rx_preset, 4);
+
+ if ((cache[(128 * 3) + 224] & 0xF) < rx_preset) {
+ dd_dev_info(
+ ppd->dd,
+ "%s: Requested RX EMP %x\n",
+ __func__, rx_preset);
+
+ dd_dev_info(
+ ppd->dd,
+ "%s: Applying supported EMP %x\n",
+ __func__, cache[608] & 0xF);
+
+ rx_preset = cache[608] & 0xF;
+ }
+
+ rx_eq = rx_preset | (rx_preset << 4);
+
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 236, &rx_eq, 1);
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 237, &rx_eq, 1);
+}
+
+static void apply_eq_settings(struct hfi1_pportdata *ppd,
+ u32 rx_preset_index, u32 tx_preset_index)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+
+ /* no point going on w/o a page 3 */
+ if (cache[2] & 4) {
+ dd_dev_info(ppd->dd,
+ "%s: Upper page 03 not present\n",
+ __func__);
+ return;
+ }
+
+ apply_tx_eq_auto(ppd);
+
+ apply_tx_eq_prog(ppd, tx_preset_index);
+
+ apply_rx_eq_emp(ppd, rx_preset_index);
+}
+
+static void apply_rx_amplitude_settings(
+ struct hfi1_pportdata *ppd, u32 rx_preset_index,
+ u32 tx_preset_index)
+{
+ u32 rx_preset;
+ u8 rx_amp = 0, i = 0, preferred = 0, *cache = ppd->qsfp_info.cache;
+
+ /* no point going on w/o a page 3 */
+ if (cache[2] & 4) {
+ dd_dev_info(ppd->dd,
+ "%s: Upper page 03 not present\n",
+ __func__);
+ return;
+ }
+ if (!(cache[QSFP_EQ_INFO_OFFS] & 0x1)) {
+ dd_dev_info(ppd->dd,
+ "%s: RX_AMP_APPLY is set to disabled\n",
+ __func__);
+ return;
+ }
+
+ get_platform_config_field(ppd->dd,
+ PLATFORM_CONFIG_RX_PRESET_TABLE,
+ rx_preset_index,
+ RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
+ &rx_preset, 4);
+
+ if (!rx_preset) {
+ dd_dev_info(ppd->dd,
+ "%s: RX_AMP_APPLY is set to disabled\n",
+ __func__);
+ return;
+ }
+ get_platform_config_field(ppd->dd,
+ PLATFORM_CONFIG_RX_PRESET_TABLE,
+ rx_preset_index,
+ RX_PRESET_TABLE_QSFP_RX_AMP,
+ &rx_preset, 4);
+
+ dd_dev_info(ppd->dd,
+ "%s: Requested RX AMP %x\n",
+ __func__,
+ rx_preset);
+
+ for (i = 0; i < 4; i++) {
+ if (cache[(128 * 3) + 225] & (1 << i)) {
+ preferred = i;
+ if (preferred == rx_preset)
+ break;
+ }
+ }
+
+ /*
+ * Verify that preferred RX amplitude is not just a
+ * fall through of the default
+ */
+ if (!preferred && !(cache[(128 * 3) + 225] & 0x1)) {
+ dd_dev_info(ppd->dd, "No supported RX AMP, not applying\n");
+ return;
+ }
+
+ dd_dev_info(ppd->dd,
+ "%s: Applying RX AMP %x\n", __func__, preferred);
+
+ rx_amp = preferred | (preferred << 4);
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 238, &rx_amp, 1);
+ qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 239, &rx_amp, 1);
+}
+
+#define OPA_INVALID_INDEX 0xFFF
+
+static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
+ u32 config_data, const char *message)
+{
+ u8 i;
+ int ret = HCMD_SUCCESS;
+
+ for (i = 0; i < 4; i++) {
+ ret = load_8051_config(ppd->dd, field_id, i, config_data);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(
+ ppd->dd,
+ "%s: %s for lane %u failed\n",
+ message, __func__, i);
+ }
+ }
+}
+
+static void apply_tunings(
+ struct hfi1_pportdata *ppd, u32 tx_preset_index,
+ u8 tuning_method, u32 total_atten, u8 limiting_active)
+{
+ int ret = 0;
+ u32 config_data = 0, tx_preset = 0;
+ u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0;
+ u8 *cache = ppd->qsfp_info.cache;
+
+ /* Enable external device config if channel is limiting active */
+ read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+ GENERAL_CONFIG, &config_data);
+ config_data |= limiting_active;
+ ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+ GENERAL_CONFIG, config_data);
+ if (ret != HCMD_SUCCESS)
+ dd_dev_err(
+ ppd->dd,
+ "%s: Failed to set enable external device config\n",
+ __func__);
+
+ config_data = 0; /* re-init */
+ /* Pass tuning method to 8051 */
+ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+ &config_data);
+ config_data |= tuning_method;
+ ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+ config_data);
+ if (ret != HCMD_SUCCESS)
+ dd_dev_err(ppd->dd, "%s: Failed to set tuning method\n",
+ __func__);
+
+ /* Set same channel loss for both TX and RX */
+ config_data = 0 | (total_atten << 16) | (total_atten << 24);
+ apply_tx_lanes(ppd, CHANNEL_LOSS_SETTINGS, config_data,
+ "Setting channel loss");
+
+ /* Inform 8051 of cable capabilities */
+ if (ppd->qsfp_info.cache_valid) {
+ external_device_config =
+ ((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) |
+ ((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) |
+ ((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) |
+ (cache[QSFP_EQ_INFO_OFFS] & 0x4);
+ ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+ GENERAL_CONFIG, &config_data);
+ /* Clear, then set the external device config field */
+ config_data &= ~(0xFF << 24);
+ config_data |= (external_device_config << 24);
+ ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+ GENERAL_CONFIG, config_data);
+ if (ret != HCMD_SUCCESS)
+ dd_dev_info(ppd->dd,
+ "%s: Failed set ext device config params\n",
+ __func__);
+ }
+
+ if (tx_preset_index == OPA_INVALID_INDEX) {
+ if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
+ dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
+ __func__);
+ return;
+ }
+
+ /* Following for limiting active channels only */
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+ TX_PRESET_TABLE_PRECUR, &tx_preset, 4);
+ precur = tx_preset;
+
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+ tx_preset_index, TX_PRESET_TABLE_ATTN, &tx_preset, 4);
+ attn = tx_preset;
+
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+ tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
+ postcur = tx_preset;
+
+ config_data = precur | (attn << 8) | (postcur << 16);
+
+ apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
+ "Applying TX settings");
+}
+
+static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
+ u32 *ptr_rx_preset, u32 *ptr_total_atten)
+{
+ int ret;
+ u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+ u8 *cache = ppd->qsfp_info.cache;
+
+ ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+ if (ret) {
+ dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
+ __func__, (int)ppd->dd->hfi1_id);
+ return ret;
+ }
+
+ ppd->qsfp_info.limiting_active = 1;
+
+ ret = set_qsfp_tx(ppd, 0);
+ if (ret)
+ goto bail_unlock;
+
+ ret = qual_power(ppd);
+ if (ret)
+ goto bail_unlock;
+
+ ret = qual_bitrate(ppd);
+ if (ret)
+ goto bail_unlock;
+
+ if (ppd->qsfp_info.reset_needed) {
+ reset_qsfp(ppd);
+ ppd->qsfp_info.reset_needed = 0;
+ refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+ } else {
+ ppd->qsfp_info.reset_needed = 1;
+ }
+
+ ret = set_qsfp_high_power(ppd);
+ if (ret)
+ goto bail_unlock;
+
+ if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
+ ret = get_platform_config_field(
+ ppd->dd,
+ PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
+ ptr_tx_preset, 4);
+ if (ret) {
+ *ptr_tx_preset = OPA_INVALID_INDEX;
+ goto bail_unlock;
+ }
+ } else {
+ ret = get_platform_config_field(
+ ppd->dd,
+ PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
+ ptr_tx_preset, 4);
+ if (ret) {
+ *ptr_tx_preset = OPA_INVALID_INDEX;
+ goto bail_unlock;
+ }
+ }
+
+ ret = get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
+ if (ret) {
+ *ptr_rx_preset = OPA_INVALID_INDEX;
+ goto bail_unlock;
+ }
+
+ if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_LOCAL_ATTEN_25G, ptr_total_atten, 4);
+ else if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G))
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_LOCAL_ATTEN_12G, ptr_total_atten, 4);
+
+ apply_cdr_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+ apply_eq_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+ apply_rx_amplitude_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+ ret = set_qsfp_tx(ppd, 1);
+
+bail_unlock:
+ release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+ return ret;
+}
+
+static int tune_qsfp(struct hfi1_pportdata *ppd,
+ u32 *ptr_tx_preset, u32 *ptr_rx_preset,
+ u8 *ptr_tuning_method, u32 *ptr_total_atten)
+{
+ u32 cable_atten = 0, remote_atten = 0, platform_atten = 0;
+ u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+ int ret = 0;
+ u8 *cache = ppd->qsfp_info.cache;
+
+ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+ case 0xA ... 0xB:
+ ret = get_platform_config_field(
+ ppd->dd,
+ PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_LOCAL_ATTEN_25G,
+ &platform_atten, 4);
+ if (ret)
+ return ret;
+
+ if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+ cable_atten = cache[QSFP_CU_ATTEN_12G_OFFS];
+ else if ((lss & OPA_LINK_SPEED_12_5G) &&
+ (lse & OPA_LINK_SPEED_12_5G))
+ cable_atten = cache[QSFP_CU_ATTEN_7G_OFFS];
+
+ /* Fallback to configured attenuation if cable memory is bad */
+ if (cable_atten == 0 || cable_atten > 36) {
+ ret = get_platform_config_field(
+ ppd->dd,
+ PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+ SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G,
+ &cable_atten, 4);
+ if (ret)
+ return ret;
+ }
+
+ ret = get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+ if (ret)
+ return ret;
+
+ *ptr_total_atten = platform_atten + cable_atten + remote_atten;
+
+ *ptr_tuning_method = OPA_PASSIVE_TUNING;
+ break;
+ case 0x0 ... 0x9: /* fallthrough */
+ case 0xC: /* fallthrough */
+ case 0xE:
+ ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
+ ptr_total_atten);
+ if (ret)
+ return ret;
+
+ *ptr_tuning_method = OPA_ACTIVE_TUNING;
+ break;
+ case 0xD: /* fallthrough */
+ case 0xF:
+ default:
+ dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+ __func__);
+ break;
+ }
+ return ret;
+}
+
+/*
+ * This function communicates its success or failure via ppd->driver_link_ready
+ * Thus, it depends on its association with start_link(...) which checks
+ * driver_link_ready before proceeding with the link negotiation and
+ * initialization process.
+ */
+void tune_serdes(struct hfi1_pportdata *ppd)
+{
+ int ret = 0;
+ u32 total_atten = 0;
+ u32 remote_atten = 0, platform_atten = 0;
+ u32 rx_preset_index, tx_preset_index;
+ u8 tuning_method = 0, limiting_active = 0;
+ struct hfi1_devdata *dd = ppd->dd;
+
+ rx_preset_index = OPA_INVALID_INDEX;
+ tx_preset_index = OPA_INVALID_INDEX;
+
+ /* the link defaults to enabled */
+ ppd->link_enabled = 1;
+ /* the driver link ready state defaults to not ready */
+ ppd->driver_link_ready = 0;
+ ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
+
+ /* Skip the tuning for testing (loopback != none) and simulations */
+ if (loopback != LOOPBACK_NONE ||
+ ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+ ppd->driver_link_ready = 1;
+ return;
+ }
+
+ ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_PORT_TYPE, &ppd->port_type,
+ 4);
+ if (ret)
+ ppd->port_type = PORT_TYPE_UNKNOWN;
+
+ switch (ppd->port_type) {
+ case PORT_TYPE_DISCONNECTED:
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
+ dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+ __func__);
+ goto bail;
+ case PORT_TYPE_FIXED:
+ /* platform_atten, remote_atten pre-zeroed to catch error */
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_LOCAL_ATTEN_25G, &platform_atten, 4);
+
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+
+ total_atten = platform_atten + remote_atten;
+
+ tuning_method = OPA_PASSIVE_TUNING;
+ break;
+ case PORT_TYPE_VARIABLE:
+ if (qsfp_mod_present(ppd)) {
+ /*
+ * platform_atten, remote_atten pre-zeroed to
+ * catch error
+ */
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_LOCAL_ATTEN_25G,
+ &platform_atten, 4);
+
+ get_platform_config_field(
+ ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_REMOTE_ATTEN_25G,
+ &remote_atten, 4);
+
+ total_atten = platform_atten + remote_atten;
+
+ tuning_method = OPA_PASSIVE_TUNING;
+ } else
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
+ break;
+ case PORT_TYPE_QSFP:
+ if (qsfp_mod_present(ppd)) {
+ refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+
+ if (ppd->qsfp_info.cache_valid) {
+ ret = tune_qsfp(ppd,
+ &tx_preset_index,
+ &rx_preset_index,
+ &tuning_method,
+ &total_atten);
+
+ /*
+ * We may have modified the QSFP memory, so
+ * update the cache to reflect the changes
+ */
+ refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+ if (ret)
+ goto bail;
+
+ limiting_active =
+ ppd->qsfp_info.limiting_active;
+ } else {
+ dd_dev_err(dd,
+ "%s: Reading QSFP memory failed\n",
+ __func__);
+ goto bail;
+ }
+ } else
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(
+ OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+ break;
+ default:
+ dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+ ppd->port_type = PORT_TYPE_UNKNOWN;
+ tuning_method = OPA_UNKNOWN_TUNING;
+ total_atten = 0;
+ limiting_active = 0;
+ tx_preset_index = OPA_INVALID_INDEX;
+ break;
+ }
+
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+ apply_tunings(ppd, tx_preset_index, tuning_method,
+ total_atten, limiting_active);
+
+ if (!ret)
+ ppd->driver_link_ready = 1;
+
+ return;
+bail:
+ ppd->driver_link_ready = 0;
+}
diff --git a/drivers/staging/rdma/hfi1/platform_config.h b/drivers/staging/rdma/hfi1/platform.h
index 8a94a8342052..19620cf546d5 100644
--- a/drivers/staging/rdma/hfi1/platform_config.h
+++ b/drivers/staging/rdma/hfi1/platform.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -47,8 +44,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
-#ifndef __PLATFORM_CONFIG_H
-#define __PLATFORM_CONFIG_H
+#ifndef __PLATFORM_H
+#define __PLATFORM_H
#define METADATA_TABLE_FIELD_START_SHIFT 0
#define METADATA_TABLE_FIELD_START_LEN_BITS 15
@@ -94,17 +91,18 @@ enum platform_config_system_table_fields {
enum platform_config_port_table_fields {
PORT_TABLE_RESERVED,
PORT_TABLE_PORT_TYPE,
- PORT_TABLE_ATTENUATION_12G,
- PORT_TABLE_ATTENUATION_25G,
+ PORT_TABLE_LOCAL_ATTEN_12G,
+ PORT_TABLE_LOCAL_ATTEN_25G,
PORT_TABLE_LINK_SPEED_SUPPORTED,
PORT_TABLE_LINK_WIDTH_SUPPORTED,
+ PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
+ PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
PORT_TABLE_VL_CAP,
PORT_TABLE_MTU_CAP,
PORT_TABLE_TX_LANE_ENABLE_MASK,
PORT_TABLE_LOCAL_MAX_TIMEOUT,
- PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
- PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
- PORT_TABLE_TX_PRESET_IDX_PASSIVE_CU,
+ PORT_TABLE_REMOTE_ATTEN_12G,
+ PORT_TABLE_REMOTE_ATTEN_25G,
PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
PORT_TABLE_RX_PRESET_IDX,
@@ -115,10 +113,10 @@ enum platform_config_port_table_fields {
enum platform_config_rx_preset_table_fields {
RX_PRESET_TABLE_RESERVED,
RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
- RX_PRESET_TABLE_QSFP_RX_EQ_APPLY,
+ RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
RX_PRESET_TABLE_QSFP_RX_CDR,
- RX_PRESET_TABLE_QSFP_RX_EQ,
+ RX_PRESET_TABLE_QSFP_RX_EMP,
RX_PRESET_TABLE_QSFP_RX_AMP,
RX_PRESET_TABLE_MAX
};
@@ -149,6 +147,11 @@ enum platform_config_variable_settings_table_fields {
VARIABLE_SETTINGS_TABLE_MAX
};
+struct platform_config {
+ size_t size;
+ const u8 *data;
+};
+
struct platform_config_data {
u32 *table;
u32 *table_metadata;
@@ -179,9 +182,11 @@ static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
* fields defined for each table above
*/
-/*=====================================================
+/*
+ * =====================================================
* System table encodings
- *====================================================*/
+ * =====================================================
+ */
#define PLATFORM_CONFIG_MAGIC_NUM 0x3d4f5041
#define PLATFORM_CONFIG_MAGIC_NUMBER_LEN 4
@@ -199,12 +204,13 @@ enum platform_config_qsfp_power_class_encoding {
QSFP_POWER_CLASS_7
};
-
-/*=====================================================
+/*
+ * ====================================================
* Port table encodings
- *==================================================== */
+ * ====================================================
+ */
enum platform_config_port_type_encoding {
- PORT_TYPE_RESERVED,
+ PORT_TYPE_UNKNOWN,
PORT_TYPE_DISCONNECTED,
PORT_TYPE_FIXED,
PORT_TYPE_VARIABLE,
@@ -283,4 +289,16 @@ enum platform_config_local_max_timeout_encoding {
LOCAL_MAX_TIMEOUT_1000_S
};
-#endif /*__PLATFORM_CONFIG_H*/
+enum link_tuning_encoding {
+ OPA_PASSIVE_TUNING,
+ OPA_ACTIVE_TUNING,
+ OPA_UNKNOWN_TUNING
+};
+
+/* platform.c */
+void get_platform_config(struct hfi1_devdata *dd);
+void free_platform_config(struct hfi1_devdata *dd);
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
+void tune_serdes(struct hfi1_pportdata *ppd);
+
+#endif /*__PLATFORM_H*/
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index ce036810d576..29a5ad28019b 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -54,31 +51,32 @@
#include <linux/module.h>
#include <linux/random.h>
#include <linux/seq_file.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
#include "hfi.h"
#include "qp.h"
#include "trace.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
-#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
-
-static unsigned int hfi1_qp_table_size = 256;
+unsigned int hfi1_qp_table_size = 256;
module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");
-static void flush_tx_list(struct hfi1_qp *qp);
+static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
unsigned seq);
static void iowait_wakeup(struct iowait *wait, int reason);
+static void iowait_sdma_drained(struct iowait *wait);
+static void qp_pio_drain(struct rvt_qp *qp);
-static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt,
- struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+ struct rvt_qpn_map *map, unsigned off)
{
- return (map - qpt->map) * BITS_PER_PAGE + off;
+ return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}
/*
@@ -118,437 +116,15 @@ static const u16 credit_table[31] = {
32768 /* 1E */
};
-static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map)
-{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
-
- /*
- * Free the page if someone raced with us installing it.
- */
-
- spin_lock(&qpt->lock);
- if (map->page)
- free_page(page);
- else
- map->page = (void *)page;
- spin_unlock(&qpt->lock);
-}
-
-/*
- * Allocate the next available QPN or
- * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
- */
-static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt,
- enum ib_qp_type type, u8 port)
-{
- u32 i, offset, max_scan, qpn;
- struct qpn_map *map;
- u32 ret;
-
- if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
- unsigned n;
-
- ret = type == IB_QPT_GSI;
- n = 1 << (ret + 2 * (port - 1));
- spin_lock(&qpt->lock);
- if (qpt->flags & n)
- ret = -EINVAL;
- else
- qpt->flags |= n;
- spin_unlock(&qpt->lock);
- goto bail;
- }
-
- qpn = qpt->last + qpt->incr;
- if (qpn >= QPN_MAX)
- qpn = qpt->incr | ((qpt->last & 1) ^ 1);
- /* offset carries bit 0 */
- offset = qpn & BITS_PER_PAGE_MASK;
- map = &qpt->map[qpn / BITS_PER_PAGE];
- max_scan = qpt->nmaps - !offset;
- for (i = 0;;) {
- if (unlikely(!map->page)) {
- get_map_page(qpt, map);
- if (unlikely(!map->page))
- break;
- }
- do {
- if (!test_and_set_bit(offset, map->page)) {
- qpt->last = qpn;
- ret = qpn;
- goto bail;
- }
- offset += qpt->incr;
- /*
- * This qpn might be bogus if offset >= BITS_PER_PAGE.
- * That is OK. It gets re-assigned below
- */
- qpn = mk_qpn(qpt, map, offset);
- } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
- /*
- * In order to keep the number of pages allocated to a
- * minimum, we scan the all existing pages before increasing
- * the size of the bitmap table.
- */
- if (++i > max_scan) {
- if (qpt->nmaps == QPNMAP_ENTRIES)
- break;
- map = &qpt->map[qpt->nmaps++];
- /* start at incr with current bit 0 */
- offset = qpt->incr | (offset & 1);
- } else if (map < &qpt->map[qpt->nmaps]) {
- ++map;
- /* start at incr with current bit 0 */
- offset = qpt->incr | (offset & 1);
- } else {
- map = &qpt->map[0];
- /* wrap to first map page, invert bit 0 */
- offset = qpt->incr | ((offset & 1) ^ 1);
- }
- /* there can be no bits at shift and below */
- WARN_ON(offset & (dd->qos_shift - 1));
- qpn = mk_qpn(qpt, map, offset);
- }
-
- ret = -ENOMEM;
-
-bail:
- return ret;
-}
-
-static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn)
-{
- struct qpn_map *map;
-
- map = qpt->map + qpn / BITS_PER_PAGE;
- if (map->page)
- clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- unsigned long flags;
-
- atomic_inc(&qp->refcount);
- spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
- if (qp->ibqp.qp_num <= 1) {
- rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp);
- } else {
- u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-
- qp->next = dev->qp_dev->qp_table[n];
- rcu_assign_pointer(dev->qp_dev->qp_table[n], qp);
- trace_hfi1_qpinsert(qp, n);
- }
-
- spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
- unsigned long flags;
- int removed = 1;
-
- spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
- if (rcu_dereference_protected(ibp->qp[0],
- lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
- RCU_INIT_POINTER(ibp->qp[0], NULL);
- } else if (rcu_dereference_protected(ibp->qp[1],
- lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
- RCU_INIT_POINTER(ibp->qp[1], NULL);
- } else {
- struct hfi1_qp *q;
- struct hfi1_qp __rcu **qpp;
-
- removed = 0;
- qpp = &dev->qp_dev->qp_table[n];
- for (; (q = rcu_dereference_protected(*qpp,
- lockdep_is_held(&dev->qp_dev->qpt_lock)))
- != NULL;
- qpp = &q->next)
- if (q == qp) {
- RCU_INIT_POINTER(*qpp,
- rcu_dereference_protected(qp->next,
- lockdep_is_held(&dev->qp_dev->qpt_lock)));
- removed = 1;
- trace_hfi1_qpremove(qp, n);
- break;
- }
- }
-
- spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
- if (removed) {
- synchronize_rcu();
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
- }
-}
-
-/**
- * free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-static unsigned free_all_qps(struct hfi1_devdata *dd)
-{
- struct hfi1_ibdev *dev = &dd->verbs_dev;
- unsigned long flags;
- struct hfi1_qp *qp;
- unsigned n, qp_inuse = 0;
-
- for (n = 0; n < dd->num_pports; n++) {
- struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
-
- if (!hfi1_mcast_tree_empty(ibp))
- qp_inuse++;
- rcu_read_lock();
- if (rcu_dereference(ibp->qp[0]))
- qp_inuse++;
- if (rcu_dereference(ibp->qp[1]))
- qp_inuse++;
- rcu_read_unlock();
- }
-
- if (!dev->qp_dev)
- goto bail;
- spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
- for (n = 0; n < dev->qp_dev->qp_table_size; n++) {
- qp = rcu_dereference_protected(dev->qp_dev->qp_table[n],
- lockdep_is_held(&dev->qp_dev->qpt_lock));
- RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL);
-
- for (; qp; qp = rcu_dereference_protected(qp->next,
- lockdep_is_held(&dev->qp_dev->qpt_lock)))
- qp_inuse++;
- }
- spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
- synchronize_rcu();
-bail:
- return qp_inuse;
-}
-
-/**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type)
-{
- qp->remote_qpn = 0;
- qp->qkey = 0;
- qp->qp_access_flags = 0;
- iowait_init(
- &qp->s_iowait,
- 1,
- hfi1_do_send,
- iowait_sleep,
- iowait_wakeup);
- qp->s_flags &= HFI1_S_SIGNAL_REQ_WR;
- qp->s_hdrwords = 0;
- qp->s_wqe = NULL;
- qp->s_draining = 0;
- qp->s_next_psn = 0;
- qp->s_last_psn = 0;
- qp->s_sending_psn = 0;
- qp->s_sending_hpsn = 0;
- qp->s_psn = 0;
- qp->r_psn = 0;
- qp->r_msn = 0;
- if (type == IB_QPT_RC) {
- qp->s_state = IB_OPCODE_RC_SEND_LAST;
- qp->r_state = IB_OPCODE_RC_SEND_LAST;
- } else {
- qp->s_state = IB_OPCODE_UC_SEND_LAST;
- qp->r_state = IB_OPCODE_UC_SEND_LAST;
- }
- qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- qp->r_nak_state = 0;
- qp->r_adefered = 0;
- qp->r_aflags = 0;
- qp->r_flags = 0;
- qp->s_head = 0;
- qp->s_tail = 0;
- qp->s_cur = 0;
- qp->s_acked = 0;
- qp->s_last = 0;
- qp->s_ssn = 1;
- qp->s_lsn = 0;
- clear_ahg(qp);
- qp->s_mig_state = IB_MIG_MIGRATED;
- memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
- qp->r_head_ack_queue = 0;
- qp->s_tail_ack_queue = 0;
- qp->s_num_rd_atomic = 0;
- if (qp->r_rq.wq) {
- qp->r_rq.wq->head = 0;
- qp->r_rq.wq->tail = 0;
- }
- qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
-{
- unsigned n;
-
- if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
- hfi1_put_ss(&qp->s_rdma_read_sge);
-
- hfi1_put_ss(&qp->r_sge);
-
- if (clr_sends) {
- while (qp->s_last != qp->s_head) {
- struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
- unsigned i;
-
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct hfi1_sge *sge = &wqe->sg_list[i];
-
- hfi1_put_mr(sge->mr);
- }
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- }
- if (qp->s_rdma_mr) {
- hfi1_put_mr(qp->s_rdma_mr);
- qp->s_rdma_mr = NULL;
- }
- }
-
- if (qp->ibqp.qp_type != IB_QPT_RC)
- return;
-
- for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
- struct hfi1_ack_entry *e = &qp->s_ack_queue[n];
-
- if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
- e->rdma_sge.mr) {
- hfi1_put_mr(e->rdma_sge.mr);
- e->rdma_sge.mr = NULL;
- }
- }
-}
-
-/**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err)
+static void flush_tx_list(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
- struct ib_wc wc;
- int ret = 0;
-
- if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
- goto bail;
-
- qp->state = IB_QPS_ERR;
-
- if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
- qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
-
- if (qp->s_flags & HFI1_S_ANY_WAIT_SEND)
- qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND;
-
- write_seqlock(&dev->iowait_lock);
- if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) {
- qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
- list_del_init(&qp->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
- }
- write_sequnlock(&dev->iowait_lock);
-
- if (!(qp->s_flags & HFI1_S_BUSY)) {
- qp->s_hdrwords = 0;
- if (qp->s_rdma_mr) {
- hfi1_put_mr(qp->s_rdma_mr);
- qp->s_rdma_mr = NULL;
- }
- flush_tx_list(qp);
- }
-
- /* Schedule the sending tasklet to drain the send work queue. */
- if (qp->s_last != qp->s_head)
- hfi1_schedule_send(qp);
-
- clear_mr_refs(qp, 0);
-
- memset(&wc, 0, sizeof(wc));
- wc.qp = &qp->ibqp;
- wc.opcode = IB_WC_RECV;
-
- if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) {
- wc.wr_id = qp->r_wr_id;
- wc.status = err;
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
- }
- wc.status = IB_WC_WR_FLUSH_ERR;
+ struct hfi1_qp_priv *priv = qp->priv;
- if (qp->r_rq.wq) {
- struct hfi1_rwq *wq;
- u32 head;
- u32 tail;
-
- spin_lock(&qp->r_rq.lock);
-
- /* sanity check pointers before trusting them */
- wq = qp->r_rq.wq;
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- while (tail != head) {
- wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
- if (++tail >= qp->r_rq.size)
- tail = 0;
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
- }
- wq->tail = tail;
-
- spin_unlock(&qp->r_rq.lock);
- } else if (qp->ibqp.event_handler)
- ret = 1;
-
-bail:
- return ret;
-}
-
-static void flush_tx_list(struct hfi1_qp *qp)
-{
- while (!list_empty(&qp->s_iowait.tx_head)) {
+ while (!list_empty(&priv->s_iowait.tx_head)) {
struct sdma_txreq *tx;
tx = list_first_entry(
- &qp->s_iowait.tx_head,
+ &priv->s_iowait.tx_head,
struct sdma_txreq,
list);
list_del_init(&tx->list);
@@ -557,14 +133,15 @@ static void flush_tx_list(struct hfi1_qp *qp)
}
}
-static void flush_iowait(struct hfi1_qp *qp)
+static void flush_iowait(struct rvt_qp *qp)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
write_seqlock_irqsave(&dev->iowait_lock, flags);
- if (!list_empty(&qp->s_iowait.list)) {
- list_del_init(&qp->s_iowait.list);
+ if (!list_empty(&priv->s_iowait.list)) {
+ list_del_init(&priv->s_iowait.list);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
@@ -597,362 +174,106 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
return ib_mtu_enum_to_int(mtu);
}
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
+ struct ib_qp *ibqp = &qp->ibqp;
struct hfi1_ibdev *dev = to_idev(ibqp->device);
- struct hfi1_qp *qp = to_iqp(ibqp);
- enum ib_qp_state cur_state, new_state;
- struct ib_event ev;
- int lastwqe = 0;
- int mig = 0;
- int ret;
- u32 pmtu = 0; /* for gcc warning only */
struct hfi1_devdata *dd = dd_from_dev(dev);
-
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_lock);
-
- cur_state = attr_mask & IB_QP_CUR_STATE ?
- attr->cur_qp_state : qp->state;
- new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, IB_LINK_LAYER_UNSPECIFIED))
- goto inval;
+ u8 sc;
if (attr_mask & IB_QP_AV) {
- u8 sc;
-
- if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
- goto inval;
- if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr))
- goto inval;
sc = ah_to_sc(ibqp->device, &attr->ah_attr);
+ if (sc == 0xf)
+ return -EINVAL;
+
if (!qp_to_sdma_engine(qp, sc) &&
dd->flags & HFI1_HAS_SEND_DMA)
- goto inval;
+ return -EINVAL;
+
+ if (!qp_to_send_context(qp, sc))
+ return -EINVAL;
}
if (attr_mask & IB_QP_ALT_PATH) {
- u8 sc;
-
- if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
- goto inval;
- if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
- goto inval;
- if (attr->alt_pkey_index >= hfi1_get_npkeys(dd))
- goto inval;
sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
+ if (sc == 0xf)
+ return -EINVAL;
+
if (!qp_to_sdma_engine(qp, sc) &&
dd->flags & HFI1_HAS_SEND_DMA)
- goto inval;
- }
-
- if (attr_mask & IB_QP_PKEY_INDEX)
- if (attr->pkey_index >= hfi1_get_npkeys(dd))
- goto inval;
-
- if (attr_mask & IB_QP_MIN_RNR_TIMER)
- if (attr->min_rnr_timer > 31)
- goto inval;
+ return -EINVAL;
- if (attr_mask & IB_QP_PORT)
- if (qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI ||
- attr->port_num == 0 ||
- attr->port_num > ibqp->device->phys_port_cnt)
- goto inval;
-
- if (attr_mask & IB_QP_DEST_QPN)
- if (attr->dest_qp_num > HFI1_QPN_MASK)
- goto inval;
+ if (!qp_to_send_context(qp, sc))
+ return -EINVAL;
+ }
- if (attr_mask & IB_QP_RETRY_CNT)
- if (attr->retry_cnt > 7)
- goto inval;
+ return 0;
+}
- if (attr_mask & IB_QP_RNR_RETRY)
- if (attr->rnr_retry > 7)
- goto inval;
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct hfi1_qp_priv *priv = qp->priv;
- /*
- * Don't allow invalid path_mtu values. OK to set greater
- * than the active mtu (or even the max_cap, if we have tuned
- * that to a small mtu. We'll set qp->path_mtu
- * to the lesser of requested attribute mtu and active,
- * for packetizing messages.
- * Note that the QP port has to be set in INIT and MTU in RTR.
- */
- if (attr_mask & IB_QP_PATH_MTU) {
- int mtu, pidx = qp->port_num - 1;
-
- dd = dd_from_dev(dev);
- mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu);
- if (mtu == -1)
- goto inval;
-
- if (mtu > dd->pport[pidx].ibmtu)
- pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
- else
- pmtu = attr->path_mtu;
+ if (attr_mask & IB_QP_AV) {
+ priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+ priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+ priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
}
- if (attr_mask & IB_QP_PATH_MIG_STATE) {
- if (attr->path_mig_state == IB_MIG_REARM) {
- if (qp->s_mig_state == IB_MIG_ARMED)
- goto inval;
- if (new_state != IB_QPS_RTS)
- goto inval;
- } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
- if (qp->s_mig_state == IB_MIG_REARM)
- goto inval;
- if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
- goto inval;
- if (qp->s_mig_state == IB_MIG_ARMED)
- mig = 1;
- } else
- goto inval;
+ if (attr_mask & IB_QP_PATH_MIG_STATE &&
+ attr->path_mig_state == IB_MIG_MIGRATED &&
+ qp->s_mig_state == IB_MIG_ARMED) {
+ qp->s_flags |= RVT_S_AHG_CLEAR;
+ priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+ priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+ priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
}
+}
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC)
- goto inval;
-
- switch (new_state) {
- case IB_QPS_RESET:
- if (qp->state != IB_QPS_RESET) {
- qp->state = IB_QPS_RESET;
- flush_iowait(qp);
- qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
- /* Stop the sending work queue and retry timer */
- cancel_work_sync(&qp->s_iowait.iowork);
- del_timer_sync(&qp->s_timer);
- iowait_sdma_drain(&qp->s_iowait);
- flush_tx_list(qp);
- remove_qp(dev, qp);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_lock);
- clear_mr_refs(qp, 1);
- clear_ahg(qp);
- reset_qp(qp, ibqp->qp_type);
- }
- break;
-
- case IB_QPS_RTR:
- /* Allow event to re-trigger if QP set to RTR more than once */
- qp->r_flags &= ~HFI1_R_COMM_EST;
- qp->state = new_state;
- break;
-
- case IB_QPS_SQD:
- qp->s_draining = qp->s_last != qp->s_cur;
- qp->state = new_state;
- break;
+/**
+ * hfi1_check_send_wqe - validate wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wqe. This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 0 on success, -EINVAL on failure
+ *
+ */
+int hfi1_check_send_wqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct rvt_ah *ah;
- case IB_QPS_SQE:
- if (qp->ibqp.qp_type == IB_QPT_RC)
- goto inval;
- qp->state = new_state;
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ if (wqe->length > 0x80000000U)
+ return -EINVAL;
break;
-
- case IB_QPS_ERR:
- lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ case IB_QPT_SMI:
+ ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ if (wqe->length > (1 << ah->log_pmtu))
+ return -EINVAL;
break;
-
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ if (wqe->length > (1 << ah->log_pmtu))
+ return -EINVAL;
+ if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+ return -EINVAL;
default:
- qp->state = new_state;
break;
}
-
- if (attr_mask & IB_QP_PKEY_INDEX)
- qp->s_pkey_index = attr->pkey_index;
-
- if (attr_mask & IB_QP_PORT)
- qp->port_num = attr->port_num;
-
- if (attr_mask & IB_QP_DEST_QPN)
- qp->remote_qpn = attr->dest_qp_num;
-
- if (attr_mask & IB_QP_SQ_PSN) {
- qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK;
- qp->s_psn = qp->s_next_psn;
- qp->s_sending_psn = qp->s_next_psn;
- qp->s_last_psn = qp->s_next_psn - 1;
- qp->s_sending_hpsn = qp->s_last_psn;
- }
-
- if (attr_mask & IB_QP_RQ_PSN)
- qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK;
-
- if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->qp_access_flags = attr->qp_access_flags;
-
- if (attr_mask & IB_QP_AV) {
- qp->remote_ah_attr = attr->ah_attr;
- qp->s_srate = attr->ah_attr.static_rate;
- qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
- qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
- qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
- }
-
- if (attr_mask & IB_QP_ALT_PATH) {
- qp->alt_ah_attr = attr->alt_ah_attr;
- qp->s_alt_pkey_index = attr->alt_pkey_index;
- }
-
- if (attr_mask & IB_QP_PATH_MIG_STATE) {
- qp->s_mig_state = attr->path_mig_state;
- if (mig) {
- qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
- qp->s_pkey_index = qp->s_alt_pkey_index;
- qp->s_flags |= HFI1_S_AHG_CLEAR;
- qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
- qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
- }
- }
-
- if (attr_mask & IB_QP_PATH_MTU) {
- struct hfi1_ibport *ibp;
- u8 sc, vl;
- u32 mtu;
-
- dd = dd_from_dev(dev);
- ibp = &dd->pport[qp->port_num - 1].ibport_data;
-
- sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
- vl = sc_to_vlt(dd, sc);
-
- mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu);
- if (vl < PER_VL_SEND_CONTEXTS)
- mtu = min_t(u32, mtu, dd->vld[vl].mtu);
- pmtu = mtu_to_enum(mtu, OPA_MTU_8192);
-
- qp->path_mtu = pmtu;
- qp->pmtu = mtu;
- }
-
- if (attr_mask & IB_QP_RETRY_CNT) {
- qp->s_retry_cnt = attr->retry_cnt;
- qp->s_retry = attr->retry_cnt;
- }
-
- if (attr_mask & IB_QP_RNR_RETRY) {
- qp->s_rnr_retry_cnt = attr->rnr_retry;
- qp->s_rnr_retry = attr->rnr_retry;
- }
-
- if (attr_mask & IB_QP_MIN_RNR_TIMER)
- qp->r_min_rnr_timer = attr->min_rnr_timer;
-
- if (attr_mask & IB_QP_TIMEOUT) {
- qp->timeout = attr->timeout;
- qp->timeout_jiffies =
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
- 1000UL);
- }
-
- if (attr_mask & IB_QP_QKEY)
- qp->qkey = attr->qkey;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
- qp->s_max_rd_atomic = attr->max_rd_atomic;
-
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
-
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- insert_qp(dev, qp);
-
- if (lastwqe) {
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- if (mig) {
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_PATH_MIG;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- ret = 0;
- goto bail;
-
-inval:
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
- ret = -EINVAL;
-
-bail:
- return ret;
-}
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_qp_init_attr *init_attr)
-{
- struct hfi1_qp *qp = to_iqp(ibqp);
-
- attr->qp_state = qp->state;
- attr->cur_qp_state = attr->qp_state;
- attr->path_mtu = qp->path_mtu;
- attr->path_mig_state = qp->s_mig_state;
- attr->qkey = qp->qkey;
- attr->rq_psn = mask_psn(qp->r_psn);
- attr->sq_psn = mask_psn(qp->s_next_psn);
- attr->dest_qp_num = qp->remote_qpn;
- attr->qp_access_flags = qp->qp_access_flags;
- attr->cap.max_send_wr = qp->s_size - 1;
- attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
- attr->cap.max_send_sge = qp->s_max_sge;
- attr->cap.max_recv_sge = qp->r_rq.max_sge;
- attr->cap.max_inline_data = 0;
- attr->ah_attr = qp->remote_ah_attr;
- attr->alt_ah_attr = qp->alt_ah_attr;
- attr->pkey_index = qp->s_pkey_index;
- attr->alt_pkey_index = qp->s_alt_pkey_index;
- attr->en_sqd_async_notify = 0;
- attr->sq_draining = qp->s_draining;
- attr->max_rd_atomic = qp->s_max_rd_atomic;
- attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
- attr->min_rnr_timer = qp->r_min_rnr_timer;
- attr->port_num = qp->port_num;
- attr->timeout = qp->timeout;
- attr->retry_cnt = qp->s_retry_cnt;
- attr->rnr_retry = qp->s_rnr_retry_cnt;
- attr->alt_port_num = qp->alt_ah_attr.port_num;
- attr->alt_timeout = qp->alt_timeout;
-
- init_attr->event_handler = qp->ibqp.event_handler;
- init_attr->qp_context = qp->ibqp.qp_context;
- init_attr->send_cq = qp->ibqp.send_cq;
- init_attr->recv_cq = qp->ibqp.recv_cq;
- init_attr->srq = qp->ibqp.srq;
- init_attr->cap = attr->cap;
- if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR)
- init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
- else
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->qp_type = qp->ibqp.qp_type;
- init_attr->port_num = qp->port_num;
- return 0;
+ return wqe->length <= piothreshold;
}
/**
@@ -961,7 +282,7 @@ int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
*
* Returns the AETH.
*/
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp)
+__be32 hfi1_compute_aeth(struct rvt_qp *qp)
{
u32 aeth = qp->r_msn & HFI1_MSN_MASK;
@@ -974,7 +295,7 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
} else {
u32 min, max, x;
u32 credits;
- struct hfi1_rwq *wq = qp->r_rq.wq;
+ struct rvt_rwq *wq = qp->r_rq.wq;
u32 head;
u32 tail;
@@ -1004,12 +325,13 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
x = (min + max) / 2;
if (credit_table[x] == credits)
break;
- if (credit_table[x] > credits)
+ if (credit_table[x] > credits) {
max = x;
- else if (min == x)
- break;
- else
+ } else {
+ if (min == x)
+ break;
min = x;
+ }
}
aeth |= x << HFI1_AETH_CREDIT_SHIFT;
}
@@ -1017,348 +339,58 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
}
/**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
-{
- struct hfi1_qp *qp;
- int err;
- struct hfi1_swqe *swq = NULL;
- struct hfi1_ibdev *dev;
- struct hfi1_devdata *dd;
- size_t sz;
- size_t sg_list_sz;
- struct ib_qp *ret;
-
- if (init_attr->cap.max_send_sge > hfi1_max_sges ||
- init_attr->cap.max_send_wr > hfi1_max_qp_wrs ||
- init_attr->create_flags) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
-
- /* Check receive queue parameters if no SRQ is specified. */
- if (!init_attr->srq) {
- if (init_attr->cap.max_recv_sge > hfi1_max_sges ||
- init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- if (init_attr->cap.max_send_sge +
- init_attr->cap.max_send_wr +
- init_attr->cap.max_recv_sge +
- init_attr->cap.max_recv_wr == 0) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- }
-
- switch (init_attr->qp_type) {
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- if (init_attr->port_num == 0 ||
- init_attr->port_num > ibpd->device->phys_port_cnt) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- case IB_QPT_UC:
- case IB_QPT_RC:
- case IB_QPT_UD:
- sz = sizeof(struct hfi1_sge) *
- init_attr->cap.max_send_sge +
- sizeof(struct hfi1_swqe);
- swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
- if (swq == NULL) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
- sz = sizeof(*qp);
- sg_list_sz = 0;
- if (init_attr->srq) {
- struct hfi1_srq *srq = to_isrq(init_attr->srq);
-
- if (srq->rq.max_sge > 1)
- sg_list_sz = sizeof(*qp->r_sg_list) *
- (srq->rq.max_sge - 1);
- } else if (init_attr->cap.max_recv_sge > 1)
- sg_list_sz = sizeof(*qp->r_sg_list) *
- (init_attr->cap.max_recv_sge - 1);
- qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
- if (!qp) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_swq;
- }
- RCU_INIT_POINTER(qp->next, NULL);
- qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
- if (!qp->s_hdr) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
- }
- qp->timeout_jiffies =
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
- 1000UL);
- if (init_attr->srq)
- sz = 0;
- else {
- qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
- qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
- sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
- sizeof(struct hfi1_rwqe);
- qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) +
- qp->r_rq.size * sz);
- if (!qp->r_rq.wq) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
- }
- }
-
- /*
- * ib_create_qp() will initialize qp->ibqp
- * except for qp->ibqp.qp_num.
- */
- spin_lock_init(&qp->r_lock);
- spin_lock_init(&qp->s_lock);
- spin_lock_init(&qp->r_rq.lock);
- atomic_set(&qp->refcount, 0);
- init_waitqueue_head(&qp->wait);
- init_timer(&qp->s_timer);
- qp->s_timer.data = (unsigned long)qp;
- INIT_LIST_HEAD(&qp->rspwait);
- qp->state = IB_QPS_RESET;
- qp->s_wq = swq;
- qp->s_size = init_attr->cap.max_send_wr + 1;
- qp->s_max_sge = init_attr->cap.max_send_sge;
- if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
- qp->s_flags = HFI1_S_SIGNAL_REQ_WR;
- dev = to_idev(ibpd->device);
- dd = dd_from_dev(dev);
- err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type,
- init_attr->port_num);
- if (err < 0) {
- ret = ERR_PTR(err);
- vfree(qp->r_rq.wq);
- goto bail_qp;
- }
- qp->ibqp.qp_num = err;
- qp->port_num = init_attr->port_num;
- reset_qp(qp, init_attr->qp_type);
-
- break;
-
- default:
- /* Don't support raw QPs */
- ret = ERR_PTR(-ENOSYS);
- goto bail;
- }
-
- init_attr->cap.max_inline_data = 0;
-
- /*
- * Return the address of the RWQ as the offset to mmap.
- * See hfi1_mmap() for details.
- */
- if (udata && udata->outlen >= sizeof(__u64)) {
- if (!qp->r_rq.wq) {
- __u64 offset = 0;
-
- err = ib_copy_to_udata(udata, &offset,
- sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_ip;
- }
- } else {
- u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz;
-
- qp->ip = hfi1_create_mmap_info(dev, s,
- ibpd->uobject->context,
- qp->r_rq.wq);
- if (!qp->ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_ip;
- }
-
- err = ib_copy_to_udata(udata, &(qp->ip->offset),
- sizeof(qp->ip->offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_ip;
- }
- }
- }
-
- spin_lock(&dev->n_qps_lock);
- if (dev->n_qps_allocated == hfi1_max_qps) {
- spin_unlock(&dev->n_qps_lock);
- ret = ERR_PTR(-ENOMEM);
- goto bail_ip;
- }
-
- dev->n_qps_allocated++;
- spin_unlock(&dev->n_qps_lock);
-
- if (qp->ip) {
- spin_lock_irq(&dev->pending_lock);
- list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
- spin_unlock_irq(&dev->pending_lock);
- }
-
- ret = &qp->ibqp;
-
- /*
- * We have our QP and its good, now keep track of what types of opcodes
- * can be processed on this QP. We do this by keeping track of what the
- * 3 high order bits of the opcode are.
- */
- switch (init_attr->qp_type) {
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- case IB_QPT_UD:
- qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK;
- break;
- case IB_QPT_RC:
- qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK;
- break;
- case IB_QPT_UC:
- qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK;
- break;
- default:
- ret = ERR_PTR(-EINVAL);
- goto bail_ip;
- }
-
- goto bail;
-
-bail_ip:
- if (qp->ip)
- kref_put(&qp->ip->ref, hfi1_release_mmap_info);
- else
- vfree(qp->r_rq.wq);
- free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
- kfree(qp->s_hdr);
- kfree(qp);
-bail_swq:
- vfree(swq);
-bail:
- return ret;
-}
-
-/**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
+ * _hfi1_schedule_send - schedule progress
+ * @qp: the QP
*
- * Returns 0 on success.
+ * This schedules qp progress w/o regard to the s_flags.
*
- * Note that this can be called while the QP is actively sending or
- * receiving!
+ * It is only used in the post send, which doesn't hold
+ * the s_lock.
*/
-int hfi1_destroy_qp(struct ib_qp *ibqp)
+void _hfi1_schedule_send(struct rvt_qp *qp)
{
- struct hfi1_qp *qp = to_iqp(ibqp);
- struct hfi1_ibdev *dev = to_idev(ibqp->device);
-
- /* Make sure HW and driver activity is stopped. */
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_lock);
- if (qp->state != IB_QPS_RESET) {
- qp->state = IB_QPS_RESET;
- flush_iowait(qp);
- qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
- cancel_work_sync(&qp->s_iowait.iowork);
- del_timer_sync(&qp->s_timer);
- iowait_sdma_drain(&qp->s_iowait);
- flush_tx_list(qp);
- remove_qp(dev, qp);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_lock);
- clear_mr_refs(qp, 1);
- clear_ahg(qp);
- }
- spin_unlock(&qp->s_lock);
- spin_unlock_irq(&qp->r_lock);
-
- /* all user's cleaned up, mark it available */
- free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
- spin_lock(&dev->n_qps_lock);
- dev->n_qps_allocated--;
- spin_unlock(&dev->n_qps_lock);
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- if (qp->ip)
- kref_put(&qp->ip->ref, hfi1_release_mmap_info);
- else
- vfree(qp->r_rq.wq);
- vfree(qp->s_wq);
- kfree(qp->s_hdr);
- kfree(qp);
- return 0;
+ iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+ priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)));
}
-/**
- * init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt)
+static void qp_pio_drain(struct rvt_qp *qp)
{
- u32 offset, qpn, i;
- struct qpn_map *map;
- int ret = 0;
+ struct hfi1_ibdev *dev;
+ struct hfi1_qp_priv *priv = qp->priv;
- spin_lock_init(&qpt->lock);
-
- qpt->last = 0;
- qpt->incr = 1 << dd->qos_shift;
-
- /* insure we don't assign QPs from KDETH 64K window */
- qpn = kdeth_qp << 16;
- qpt->nmaps = qpn / BITS_PER_PAGE;
- /* This should always be zero */
- offset = qpn & BITS_PER_PAGE_MASK;
- map = &qpt->map[qpt->nmaps];
- dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n",
- qpn, qpn + 65535);
- for (i = 0; i < 65536; i++) {
- if (!map->page) {
- get_map_page(qpt, map);
- if (!map->page) {
- ret = -ENOMEM;
- break;
- }
- }
- set_bit(offset, map->page);
- offset++;
- if (offset == BITS_PER_PAGE) {
- /* next page */
- qpt->nmaps++;
- map++;
- offset = 0;
- }
+ if (!priv->s_sendcontext)
+ return;
+ dev = to_idev(qp->ibqp.device);
+ while (iowait_pio_pending(&priv->s_iowait)) {
+ write_seqlock_irq(&dev->iowait_lock);
+ hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
+ write_sequnlock_irq(&dev->iowait_lock);
+ iowait_pio_drain(&priv->s_iowait);
+ write_seqlock_irq(&dev->iowait_lock);
+ hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
+ write_sequnlock_irq(&dev->iowait_lock);
}
- return ret;
}
/**
- * free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
+ * hfi1_schedule_send - schedule progress
+ * @qp: the QP
+ *
+ * This schedules qp progress and caller should hold
+ * the s_lock.
*/
-static void free_qpn_table(struct hfi1_qpn_table *qpt)
+void hfi1_schedule_send(struct rvt_qp *qp)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
- free_page((unsigned long) qpt->map[i].page);
+ if (hfi1_send_ok(qp))
+ _hfi1_schedule_send(qp);
}
/**
@@ -1368,7 +400,7 @@ static void free_qpn_table(struct hfi1_qpn_table *qpt)
*
* The QP s_lock should be held.
*/
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
{
u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
@@ -1378,27 +410,27 @@ void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
* honor the credit field.
*/
if (credit == HFI1_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= HFI1_S_UNLIMITED_CREDIT;
- if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+ qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
hfi1_schedule_send(qp);
}
}
- } else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
+ } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
/* Compute new LSN (i.e., MSN + credit) */
credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
if (cmp_msn(credit, qp->s_lsn) > 0) {
qp->s_lsn = credit;
- if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
hfi1_schedule_send(qp);
}
}
}
}
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag)
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
{
unsigned long flags;
@@ -1421,16 +453,17 @@ static int iowait_sleep(
unsigned seq)
{
struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
- struct hfi1_qp *qp;
+ struct rvt_qp *qp;
+ struct hfi1_qp_priv *priv;
unsigned long flags;
int ret = 0;
struct hfi1_ibdev *dev;
qp = tx->qp;
+ priv = qp->priv;
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
-
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
/*
* If we couldn't queue the DMA request, save the info
* and try again later rather than destroying the
@@ -1442,18 +475,18 @@ static int iowait_sleep(
write_seqlock(&dev->iowait_lock);
if (sdma_progress(sde, seq, stx))
goto eagain;
- if (list_empty(&qp->s_iowait.list)) {
+ if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
- ibp->n_dmawait++;
- qp->s_flags |= HFI1_S_WAIT_DMA_DESC;
- list_add_tail(&qp->s_iowait.list, &sde->dmawait);
- trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC);
+ ibp->rvp.n_dmawait++;
+ qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+ list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+ trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
atomic_inc(&qp->refcount);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~HFI1_S_BUSY;
+ qp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
} else {
@@ -1470,61 +503,25 @@ eagain:
static void iowait_wakeup(struct iowait *wait, int reason)
{
- struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
+ struct rvt_qp *qp = iowait_to_qp(wait);
WARN_ON(reason != SDMA_AVAIL_REASON);
- hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC);
+ hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
}
-int hfi1_qp_init(struct hfi1_ibdev *dev)
+static void iowait_sdma_drained(struct iowait *wait)
{
- struct hfi1_devdata *dd = dd_from_dev(dev);
- int i;
- int ret = -ENOMEM;
-
- /* allocate parent object */
- dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL);
- if (!dev->qp_dev)
- goto nomem;
- /* allocate hash table */
- dev->qp_dev->qp_table_size = hfi1_qp_table_size;
- dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size);
- dev->qp_dev->qp_table =
- kmalloc(dev->qp_dev->qp_table_size *
- sizeof(*dev->qp_dev->qp_table),
- GFP_KERNEL);
- if (!dev->qp_dev->qp_table)
- goto nomem;
- for (i = 0; i < dev->qp_dev->qp_table_size; i++)
- RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL);
- spin_lock_init(&dev->qp_dev->qpt_lock);
- /* initialize qpn map */
- ret = init_qpn_table(dd, &dev->qp_dev->qpn_table);
- if (ret)
- goto nomem;
- return ret;
-nomem:
- if (dev->qp_dev) {
- kfree(dev->qp_dev->qp_table);
- free_qpn_table(&dev->qp_dev->qpn_table);
- kfree(dev->qp_dev);
- }
- return ret;
-}
+ struct rvt_qp *qp = iowait_to_qp(wait);
-void hfi1_qp_exit(struct hfi1_ibdev *dev)
-{
- struct hfi1_devdata *dd = dd_from_dev(dev);
- u32 qps_inuse;
-
- qps_inuse = free_all_qps(dd);
- if (qps_inuse)
- dd_dev_err(dd, "QP memory leak! %u still in use\n",
- qps_inuse);
- if (dev->qp_dev) {
- kfree(dev->qp_dev->qp_table);
- free_qpn_table(&dev->qp_dev->qpn_table);
- kfree(dev->qp_dev);
+ /*
+ * This happens when the send engine notes
+ * a QP in the error state and cannot
+ * do the flush work until that QP's
+ * sdma work has finished.
+ */
+ if (qp->s_flags & RVT_S_WAIT_DMA) {
+ qp->s_flags &= ~RVT_S_WAIT_DMA;
+ hfi1_schedule_send(qp);
}
}
@@ -1537,7 +534,7 @@ void hfi1_qp_exit(struct hfi1_ibdev *dev)
* Return:
* A send engine for the qp or NULL for SMI type qp.
*/
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct sdma_engine *sde;
@@ -1554,9 +551,33 @@ struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
return sde;
}
+/*
+ * qp_to_send_context - map a qp to a send context
+ * @qp: the QP
+ * @sc5: the 5 bit sc
+ *
+ * Return:
+ * A send context for the qp
+ */
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_SMI:
+ /* SMA packets to VL15 */
+ return dd->vld[15].sc;
+ default:
+ break;
+ }
+
+ return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
+ sc5);
+}
+
struct qp_iter {
struct hfi1_ibdev *dev;
- struct hfi1_qp *qp;
+ struct rvt_qp *qp;
int specials;
int n;
};
@@ -1570,7 +591,7 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
return NULL;
iter->dev = dev;
- iter->specials = dev->ibdev.phys_port_cnt * 2;
+ iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
if (qp_iter_next(iter)) {
kfree(iter);
return NULL;
@@ -1584,8 +605,8 @@ int qp_iter_next(struct qp_iter *iter)
struct hfi1_ibdev *dev = iter->dev;
int n = iter->n;
int ret = 1;
- struct hfi1_qp *pqp = iter->qp;
- struct hfi1_qp *qp;
+ struct rvt_qp *pqp = iter->qp;
+ struct rvt_qp *qp;
/*
* The approach is to consider the special qps
@@ -1597,11 +618,11 @@ int qp_iter_next(struct qp_iter *iter)
*
* n = 0..iter->specials is the special qp indices
*
- * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are
+ * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
* the potential hash bucket entries
*
*/
- for (; n < dev->qp_dev->qp_table_size + iter->specials; n++) {
+ for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
if (pqp) {
qp = rcu_dereference(pqp->next);
} else {
@@ -1610,17 +631,17 @@ int qp_iter_next(struct qp_iter *iter)
struct hfi1_ibport *ibp;
int pidx;
- pidx = n % dev->ibdev.phys_port_cnt;
+ pidx = n % dev->rdi.ibdev.phys_port_cnt;
ppd = &dd_from_dev(dev)->pport[pidx];
ibp = &ppd->ibport_data;
if (!(n & 1))
- qp = rcu_dereference(ibp->qp[0]);
+ qp = rcu_dereference(ibp->rvp.qp[0]);
else
- qp = rcu_dereference(ibp->qp[1]);
+ qp = rcu_dereference(ibp->rvp.qp[1]);
} else {
qp = rcu_dereference(
- dev->qp_dev->qp_table[
+ dev->rdi.qp_dev->qp_table[
(n - iter->specials)]);
}
}
@@ -1638,7 +659,7 @@ static const char * const qp_type_str[] = {
"SMI", "GSI", "RC", "UC", "UD",
};
-static int qp_idle(struct hfi1_qp *qp)
+static int qp_idle(struct rvt_qp *qp)
{
return
qp->s_last == qp->s_acked &&
@@ -1649,14 +670,17 @@ static int qp_idle(struct hfi1_qp *qp)
void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
{
- struct hfi1_swqe *wqe;
- struct hfi1_qp *qp = iter->qp;
+ struct rvt_swqe *wqe;
+ struct rvt_qp *qp = iter->qp;
+ struct hfi1_qp_priv *priv = qp->priv;
struct sdma_engine *sde;
+ struct send_context *send_context;
- sde = qp_to_sdma_engine(qp, qp->s_sc);
- wqe = get_swqe_ptr(qp, qp->s_last);
+ sde = qp_to_sdma_engine(qp, priv->s_sc);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+ send_context = qp_to_send_context(qp, priv->s_sc);
seq_printf(s,
- "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n",
+ "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@@ -1666,8 +690,9 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
wqe ? wqe->wr.opcode : 0,
qp->s_hdrwords,
qp->s_flags,
- atomic_read(&qp->s_iowait.sdma_busy),
- !list_empty(&qp->s_iowait.list),
+ iowait_sdma_pending(&priv->s_iowait),
+ iowait_pio_pending(&priv->s_iowait),
+ !list_empty(&priv->s_iowait.list),
qp->timeout,
wqe ? wqe->ssn : 0,
qp->s_lsn,
@@ -1676,20 +701,26 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_sending_psn, qp->s_sending_hpsn,
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
+ qp->s_avail,
qp->remote_qpn,
qp->remote_ah_attr.dlid,
qp->remote_ah_attr.sl,
qp->pmtu,
+ qp->s_retry,
qp->s_retry_cnt,
- qp->timeout,
qp->s_rnr_retry_cnt,
sde,
- sde ? sde->this_idx : 0);
+ sde ? sde->this_idx : 0,
+ send_context,
+ send_context ? send_context->sw_index : 0,
+ ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
+ ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
+ qp->pid);
}
-void qp_comm_est(struct hfi1_qp *qp)
+void qp_comm_est(struct rvt_qp *qp)
{
- qp->r_flags |= HFI1_R_COMM_EST;
+ qp->r_flags |= RVT_R_COMM_EST;
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -1700,24 +731,241 @@ void qp_comm_est(struct hfi1_qp *qp)
}
}
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ gfp_t gfp)
+{
+ struct hfi1_qp_priv *priv;
+
+ priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ priv->owner = qp;
+
+ priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
+ if (!priv->s_hdr) {
+ kfree(priv);
+ return ERR_PTR(-ENOMEM);
+ }
+ setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
+ qp->s_timer.function = hfi1_rc_timeout;
+ return priv;
+}
+
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ kfree(priv->s_hdr);
+ kfree(priv);
+}
+
+unsigned free_all_qps(struct rvt_dev_info *rdi)
+{
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ int n;
+ unsigned qp_inuse = 0;
+
+ for (n = 0; n < dd->num_pports; n++) {
+ struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
+
+ rcu_read_lock();
+ if (rcu_dereference(ibp->rvp.qp[0]))
+ qp_inuse++;
+ if (rcu_dereference(ibp->rvp.qp[1]))
+ qp_inuse++;
+ rcu_read_unlock();
+ }
+
+ return qp_inuse;
+}
+
+void flush_qp_waiters(struct rvt_qp *qp)
+{
+ flush_iowait(qp);
+ hfi1_stop_rc_timers(qp);
+}
+
+void stop_send_queue(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ cancel_work_sync(&priv->s_iowait.iowork);
+ hfi1_del_timers_sync(qp);
+}
+
+void quiesce_qp(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ iowait_sdma_drain(&priv->s_iowait);
+ qp_pio_drain(qp);
+ flush_tx_list(qp);
+}
+
+void notify_qp_reset(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ iowait_init(
+ &priv->s_iowait,
+ 1,
+ _hfi1_do_send,
+ iowait_sleep,
+ iowait_wakeup,
+ iowait_sdma_drained);
+ priv->r_adefered = 0;
+ clear_ahg(qp);
+}
+
/*
* Switch to alternate path.
* The QP s_lock should be held and interrupts disabled.
*/
-void hfi1_migrate_qp(struct hfi1_qp *qp)
+void hfi1_migrate_qp(struct rvt_qp *qp)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct ib_event ev;
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
qp->port_num = qp->alt_ah_attr.port_num;
qp->s_pkey_index = qp->s_alt_pkey_index;
- qp->s_flags |= HFI1_S_AHG_CLEAR;
- qp->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
- qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
+ qp->s_flags |= RVT_S_AHG_CLEAR;
+ priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
+ priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_PATH_MIG;
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
+
+int mtu_to_path_mtu(u32 mtu)
+{
+ return mtu_to_enum(mtu, OPA_MTU_8192);
+}
+
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+ u32 mtu;
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ struct hfi1_ibport *ibp;
+ u8 sc, vl;
+
+ ibp = &dd->pport[qp->port_num - 1].ibport_data;
+ sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ vl = sc_to_vlt(dd, sc);
+
+ mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
+ if (vl < PER_VL_SEND_CONTEXTS)
+ mtu = min_t(u32, mtu, dd->vld[vl].mtu);
+ return mtu;
+}
+
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_attr *attr)
+{
+ int mtu, pidx = qp->port_num - 1;
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
+ if (mtu == -1)
+ return -1; /* values less than 0 are error */
+
+ if (mtu > dd->pport[pidx].ibmtu)
+ return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
+ else
+ return attr->path_mtu;
+}
+
+void notify_error_qp(struct rvt_qp *qp)
+{
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ write_seqlock(&dev->iowait_lock);
+ if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
+ qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+ list_del_init(&priv->s_iowait.list);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+ write_sequnlock(&dev->iowait_lock);
+
+ if (!(qp->s_flags & RVT_S_BUSY)) {
+ qp->s_hdrwords = 0;
+ if (qp->s_rdma_mr) {
+ rvt_put_mr(qp->s_rdma_mr);
+ qp->s_rdma_mr = NULL;
+ }
+ flush_tx_list(qp);
+ }
+}
+
+/**
+ * hfi1_error_port_qps - put a port's RC/UC qps into error state
+ * @ibp: the ibport.
+ * @sl: the service level.
+ *
+ * This function places all RC/UC qps with a given service level into error
+ * state. It is generally called to force upper lay apps to abandon stale qps
+ * after an sl->sc mapping change.
+ */
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
+{
+ struct rvt_qp *qp = NULL;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
+ int n;
+ int lastwqe;
+ struct ib_event ev;
+
+ rcu_read_lock();
+
+ /* Deal only with RC/UC qps that use the given SL. */
+ for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
+ for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next)) {
+ if (qp->port_num == ppd->port &&
+ (qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_RC) &&
+ qp->remote_ah_attr.sl == sl &&
+ (ib_rvt_state_ops[qp->state] &
+ RVT_POST_SEND_OK)) {
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+ lastwqe = rvt_error_qp(qp,
+ IB_WC_WR_FLUSH_ERR);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+ if (lastwqe) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event =
+ IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev,
+ qp->ibqp.qp_context);
+ }
+ }
+ }
+ }
+
+ rcu_read_unlock();
+}
diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h
index 62a94c5d7dca..e7bc8d6cf681 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/staging/rdma/hfi1/qp.h
@@ -1,14 +1,13 @@
#ifndef _QP_H
#define _QP_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -51,119 +48,33 @@
*/
#include <linux/hash.h>
+#include <rdma/rdmavt_qp.h>
#include "verbs.h"
#include "sdma.h"
-#define QPN_MAX (1 << 24)
-#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+extern unsigned int hfi1_qp_table_size;
/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
- void *page;
-};
-
-struct hfi1_qpn_table {
- spinlock_t lock; /* protect changes in this struct */
- unsigned flags; /* flags for QP0/1 allocated for each port */
- u32 last; /* last QP number allocated */
- u32 nmaps; /* size of the map table */
- u16 limit;
- u8 incr;
- /* bit map of free QP numbers other than 0/1 */
- struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct hfi1_qp_ibdev {
- u32 qp_table_size;
- u32 qp_table_bits;
- struct hfi1_qp __rcu **qp_table;
- spinlock_t qpt_lock;
- struct hfi1_qpn_table qpn_table;
-};
-
-static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn)
-{
- return hash_32(qpn, dev->qp_table_bits);
-}
-
-/**
- * hfi1_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
+ * free_ahg - clear ahg from QP
*/
-static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp,
- u32 qpn) __must_hold(RCU)
+static inline void clear_ahg(struct rvt_qp *qp)
{
- struct hfi1_qp *qp = NULL;
-
- if (unlikely(qpn <= 1)) {
- qp = rcu_dereference(ibp->qp[qpn]);
- } else {
- struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
- u32 n = qpn_hash(dev->qp_dev, qpn);
-
- for (qp = rcu_dereference(dev->qp_dev->qp_table[n]); qp;
- qp = rcu_dereference(qp->next))
- if (qp->ibqp.qp_num == qpn)
- break;
- }
- return qp;
-}
+ struct hfi1_qp_priv *priv = qp->priv;
-/**
- * clear_ahg - reset ahg status in qp
- * @qp - qp pointer
- */
-static inline void clear_ahg(struct hfi1_qp *qp)
-{
- qp->s_hdr->ahgcount = 0;
- qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
- if (qp->s_sde && qp->s_ahgidx >= 0)
- sdma_ahg_free(qp->s_sde, qp->s_ahgidx);
+ priv->s_hdr->ahgcount = 0;
+ qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+ if (priv->s_sde && qp->s_ahgidx >= 0)
+ sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
qp->s_ahgidx = -1;
}
/**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err);
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata);
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_qp_init_attr *init_attr);
-
-/**
* hfi1_compute_aeth - compute the AETH (syndrome + MSN)
* @qp: the queue pair to compute the AETH for
*
* Returns the AETH.
*/
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp);
+__be32 hfi1_compute_aeth(struct rvt_qp *qp);
/**
* hfi1_create_qp - create a queue pair for a device
@@ -179,45 +90,23 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
/**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int hfi1_destroy_qp(struct ib_qp *ibqp);
-
-/**
* hfi1_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
*/
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth);
-
-/**
- * hfi1_qp_init - allocate QP tables
- * @dev: a pointer to the hfi1_ibdev
- */
-int hfi1_qp_init(struct hfi1_ibdev *dev);
-
-/**
- * hfi1_qp_exit - free the QP related structures
- * @dev: a pointer to the hfi1_ibdev
- */
-void hfi1_qp_exit(struct hfi1_ibdev *dev);
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
/**
* hfi1_qp_wakeup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
*/
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag);
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag);
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5);
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5);
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
struct qp_iter;
@@ -244,43 +133,28 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
* qp_comm_est - handle trap with QP established
* @qp: the QP
*/
-void qp_comm_est(struct hfi1_qp *qp);
+void qp_comm_est(struct rvt_qp *qp);
-/**
- * _hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress w/o regard to the s_flags.
- *
- * It is only used in the post send, which doesn't hold
- * the s_lock.
- */
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp)
-{
- struct hfi1_ibport *ibp =
- to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+void _hfi1_schedule_send(struct rvt_qp *qp);
+void hfi1_schedule_send(struct rvt_qp *qp);
- iowait_schedule(&qp->s_iowait, ppd->hfi1_wq,
- qp->s_sde ?
- qp->s_sde->cpu :
- cpumask_first(cpumask_of_node(dd->assigned_node_id)));
-}
-
-/**
- * hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress and caller should hold
- * the s_lock.
- */
-static inline void hfi1_schedule_send(struct hfi1_qp *qp)
-{
- if (hfi1_send_ok(qp))
- _hfi1_schedule_send(qp);
-}
-
-void hfi1_migrate_qp(struct hfi1_qp *qp);
+void hfi1_migrate_qp(struct rvt_qp *qp);
+/*
+ * Functions provided by hfi1 driver for rdmavt to use
+ */
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ gfp_t gfp);
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+unsigned free_all_qps(struct rvt_dev_info *rdi);
+void notify_qp_reset(struct rvt_qp *qp);
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_attr *attr);
+void flush_qp_waiters(struct rvt_qp *qp);
+void notify_error_qp(struct rvt_qp *qp);
+void stop_send_queue(struct rvt_qp *qp);
+void quiesce_qp(struct rvt_qp *qp);
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+int mtu_to_path_mtu(u32 mtu);
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
#endif /* _QP_H */
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c
index 6326a915d7fd..9ed1963010fe 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/staging/rdma/hfi1/qsfp.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -62,7 +59,7 @@
#define I2C_MAX_RETRY 4
/*
- * Unlocked i2c write. Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c write. No set-up or lock checking.
*/
static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len)
@@ -71,14 +68,6 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int ret, cnt;
u8 *buff = bp;
- /* Make sure TWSI bus is in sane state. */
- ret = hfi1_twsi_reset(dd, target);
- if (ret) {
- hfi1_dev_porterr(dd, ppd->port,
- "I2C interface Reset for write failed\n");
- return -EIO;
- }
-
cnt = 0;
while (cnt < len) {
int wlen = len - cnt;
@@ -99,48 +88,45 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
return cnt;
}
+/*
+ * Caller must hold the i2c chain resource.
+ */
int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
{
- struct hfi1_devdata *dd = ppd->dd;
int ret;
- ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
- if (!ret) {
- ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
- mutex_unlock(&dd->qsfp_i2c_mutex);
+ if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ return -EACCES;
+
+ /* make sure the TWSI bus is in a sane state */
+ ret = hfi1_twsi_reset(ppd->dd, target);
+ if (ret) {
+ hfi1_dev_porterr(ppd->dd, ppd->port,
+ "I2C chain %d write interface reset failed\n",
+ target);
+ return ret;
}
- return ret;
+ return __i2c_write(ppd, target, i2c_addr, offset, bp, len);
}
/*
- * Unlocked i2c read. Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c read. No set-up or lock checking.
*/
static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
int ret, cnt, pass = 0;
- int stuck = 0;
- u8 *buff = bp;
-
- /* Make sure TWSI bus is in sane state. */
- ret = hfi1_twsi_reset(dd, target);
- if (ret) {
- hfi1_dev_porterr(dd, ppd->port,
- "I2C interface Reset for read failed\n");
- ret = -EIO;
- stuck = 1;
- goto exit;
- }
+ int orig_offset = offset;
cnt = 0;
while (cnt < len) {
int rlen = len - cnt;
ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
- buff + cnt, rlen);
+ bp + cnt, rlen);
/* Some QSFP's fail first try. Retry as experiment */
if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
continue;
@@ -156,14 +142,11 @@ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
ret = cnt;
exit:
- if (stuck)
- dd_dev_err(dd, "I2C interface bus stuck non-idle\n");
-
- if (pass >= I2C_MAX_RETRY && ret)
+ if (ret < 0) {
hfi1_dev_porterr(dd, ppd->port,
- "I2C failed even retrying\n");
- else if (pass)
- hfi1_dev_porterr(dd, ppd->port, "I2C retries: %d\n", pass);
+ "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n",
+ target, i2c_addr, orig_offset, len);
+ }
/* Must wait min 20us between qsfp i2c transactions */
udelay(20);
@@ -171,21 +154,35 @@ exit:
return ret;
}
+/*
+ * Caller must hold the i2c chain resource.
+ */
int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
{
- struct hfi1_devdata *dd = ppd->dd;
int ret;
- ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
- if (!ret) {
- ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
- mutex_unlock(&dd->qsfp_i2c_mutex);
+ if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ return -EACCES;
+
+ /* make sure the TWSI bus is in a sane state */
+ ret = hfi1_twsi_reset(ppd->dd, target);
+ if (ret) {
+ hfi1_dev_porterr(ppd->dd, ppd->port,
+ "I2C chain %d read interface reset failed\n",
+ target);
+ return ret;
}
- return ret;
+ return __i2c_read(ppd, target, i2c_addr, offset, bp, len);
}
+/*
+ * Write page n, offset m of QSFP memory as defined by SFF 8636
+ * by writing @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
{
@@ -195,50 +192,81 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int ret;
u8 page;
- ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
- if (ret)
+ if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ return -EACCES;
+
+ /* make sure the TWSI bus is in a sane state */
+ ret = hfi1_twsi_reset(ppd->dd, target);
+ if (ret) {
+ hfi1_dev_porterr(ppd->dd, ppd->port,
+ "QSFP chain %d write interface reset failed\n",
+ target);
return ret;
+ }
while (count < len) {
/*
- * Set the qsfp page based on a zero-based addresss
+ * Set the qsfp page based on a zero-based address
* and a page size of QSFP_PAGESIZE bytes.
*/
page = (u8)(addr / QSFP_PAGESIZE);
- ret = __i2c_write(ppd, target, QSFP_DEV,
- QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+ ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+ QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
if (ret != 1) {
- hfi1_dev_porterr(
- ppd->dd,
- ppd->port,
- "can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+ hfi1_dev_porterr(ppd->dd, ppd->port,
+ "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+ target, ret);
ret = -EIO;
break;
}
- /* truncate write to end of page if crossing page boundary */
offset = addr % QSFP_PAGESIZE;
nwrite = len - count;
- if ((offset + nwrite) > QSFP_PAGESIZE)
- nwrite = QSFP_PAGESIZE - offset;
+ /* truncate write to boundary if crossing boundary */
+ if (((addr % QSFP_RW_BOUNDARY) + nwrite) > QSFP_RW_BOUNDARY)
+ nwrite = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
- ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count,
- nwrite);
- if (ret <= 0) /* stop on error or nothing read */
+ ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+ offset, bp + count, nwrite);
+ if (ret <= 0) /* stop on error or nothing written */
break;
count += ret;
addr += ret;
}
- mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
if (ret < 0)
return ret;
return count;
}
+/*
+ * Perform a stand-alone single QSFP write. Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+ int len)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ u32 resource = qsfp_resource(dd);
+ int ret;
+
+ ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+ if (ret)
+ return ret;
+ ret = qsfp_write(ppd, target, addr, bp, len);
+ release_chip_resource(dd, resource);
+
+ return ret;
+}
+
+/*
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * by reading @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
{
@@ -248,9 +276,17 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int ret;
u8 page;
- ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
- if (ret)
+ if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+ return -EACCES;
+
+ /* make sure the TWSI bus is in a sane state */
+ ret = hfi1_twsi_reset(ppd->dd, target);
+ if (ret) {
+ hfi1_dev_porterr(ppd->dd, ppd->port,
+ "QSFP chain %d read interface reset failed\n",
+ target);
return ret;
+ }
while (count < len) {
/*
@@ -258,25 +294,26 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
* and a page size of QSFP_PAGESIZE bytes.
*/
page = (u8)(addr / QSFP_PAGESIZE);
- ret = __i2c_write(ppd, target, QSFP_DEV,
- QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+ ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+ QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
if (ret != 1) {
- hfi1_dev_porterr(
- ppd->dd,
- ppd->port,
- "can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+ hfi1_dev_porterr(ppd->dd, ppd->port,
+ "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+ target, ret);
ret = -EIO;
break;
}
- /* truncate read to end of page if crossing page boundary */
offset = addr % QSFP_PAGESIZE;
nread = len - count;
- if ((offset + nread) > QSFP_PAGESIZE)
- nread = QSFP_PAGESIZE - offset;
-
- ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count,
- nread);
+ /* truncate read to boundary if crossing boundary */
+ if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY)
+ nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
+
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+ offset, bp + count, nread);
if (ret <= 0) /* stop on error or nothing read */
break;
@@ -284,17 +321,40 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
addr += ret;
}
- mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
if (ret < 0)
return ret;
return count;
}
/*
+ * Perform a stand-alone single QSFP read. Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+ int len)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ u32 resource = qsfp_resource(dd);
+ int ret;
+
+ ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+ if (ret)
+ return ret;
+ ret = qsfp_read(ppd, target, addr, bp, len);
+ release_chip_resource(dd, resource);
+
+ return ret;
+}
+
+/*
* This function caches the QSFP memory range in 128 byte chunks.
* As an example, the next byte after address 255 is byte 128 from
* upper page 01H (if existing) rather than byte 0 from lower page 00H.
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * in the cache by reading byte ((128 * n) + m)
+ * The calls to qsfp_{read,write} in this function correctly handle the
+ * address map difference between this mapping and the mapping implemented
+ * by those functions
*/
int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
{
@@ -304,79 +364,84 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
u8 *cache = &cp->cache[0];
/* ensure sane contents on invalid reads, for cable swaps */
- memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
- dd_dev_info(ppd->dd, "%s: called\n", __func__);
+ memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
+ spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
+ ppd->qsfp_info.cache_valid = 0;
+ spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
+
if (!qsfp_mod_present(ppd)) {
ret = -ENODEV;
- goto bail;
+ goto bail_no_release;
}
- ret = qsfp_read(ppd, target, 0, cache, 256);
- if (ret != 256) {
+ ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+ if (ret)
+ goto bail_no_release;
+
+ ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
+ if (ret != QSFP_PAGESIZE) {
dd_dev_info(ppd->dd,
- "%s: Read of pages 00H failed, expected 256, got %d\n",
- __func__, ret);
+ "%s: Page 0 read failed, expected %d, got %d\n",
+ __func__, QSFP_PAGESIZE, ret);
goto bail;
}
- if (cache[0] != 0x0C && cache[0] != 0x0D)
- goto bail;
-
/* Is paging enabled? */
if (!(cache[2] & 4)) {
-
/* Paging enabled, page 03 required */
if ((cache[195] & 0xC0) == 0xC0) {
/* all */
ret = qsfp_read(ppd, target, 384, cache + 256, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 640, cache + 384, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
} else if ((cache[195] & 0x80) == 0x80) {
/* only page 2 and 3 */
ret = qsfp_read(ppd, target, 640, cache + 384, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
} else if ((cache[195] & 0x40) == 0x40) {
/* only page 1 and 3 */
ret = qsfp_read(ppd, target, 384, cache + 256, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
} else {
/* only page 3 */
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
- dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+ dd_dev_info(ppd->dd, "%s failed\n", __func__);
goto bail;
}
}
}
+ release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.cache_valid = 1;
ppd->qsfp_info.cache_refresh_required = 0;
@@ -385,7 +450,9 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
return 0;
bail:
- memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
+ release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+bail_no_release:
+ memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
return ret;
}
@@ -434,7 +501,7 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
if (port_num > dd->num_pports || port_num < 1) {
dd_dev_info(dd, "%s: Invalid port number %d\n",
- __func__, port_num);
+ __func__, port_num);
ret = -EINVAL;
goto set_zeroes;
}
@@ -485,7 +552,6 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
lenstr[1] = '\0';
if (ppd->qsfp_info.cache_valid) {
-
if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
@@ -529,7 +595,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
memcpy(bin_buff, &cache[bidx], QSFP_DUMP_CHUNK);
for (iidx = 0; iidx < QSFP_DUMP_CHUNK; ++iidx) {
- sofar += scnprintf(buf + sofar, len-sofar,
+ sofar += scnprintf(buf + sofar, len - sofar,
" %02X", bin_buff[iidx]);
}
sofar += scnprintf(buf + sofar, len - sofar, "\n");
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h
index d30c2a6baa0b..831fe4cf1345 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/staging/rdma/hfi1/qsfp.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -59,23 +56,28 @@
* Below are masks for QSFP pins. Pins are the same for HFI0 and HFI1.
* _N means asserted low
*/
-#define QSFP_HFI0_I2CCLK (1 << 0)
-#define QSFP_HFI0_I2CDAT (1 << 1)
-#define QSFP_HFI0_RESET_N (1 << 2)
-#define QSFP_HFI0_INT_N (1 << 3)
-#define QSFP_HFI0_MODPRST_N (1 << 4)
+#define QSFP_HFI0_I2CCLK BIT(0)
+#define QSFP_HFI0_I2CDAT BIT(1)
+#define QSFP_HFI0_RESET_N BIT(2)
+#define QSFP_HFI0_INT_N BIT(3)
+#define QSFP_HFI0_MODPRST_N BIT(4)
/* QSFP is paged at 256 bytes */
#define QSFP_PAGESIZE 256
+/* Reads/writes cannot cross 128 byte boundaries */
+#define QSFP_RW_BOUNDARY 128
+
+/* number of bytes in i2c offset for QSFP devices */
+#define __QSFP_OFFSET_SIZE 1 /* num address bytes */
+#define QSFP_OFFSET_SIZE (__QSFP_OFFSET_SIZE << 8) /* shifted value */
/* Defined fields that Intel requires of qualified cables */
/* Byte 0 is Identifier, not checked */
/* Byte 1 is reserved "status MSB" */
-/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
-/*
- * Rest of first 128 not used, although 127 is reserved for page select
- * if module is not "Flat memory".
- */
+#define QSFP_TX_CTRL_BYTE_OFFS 86
+#define QSFP_PWR_CTRL_BYTE_OFFS 93
+#define QSFP_CDR_CTRL_BYTE_OFFS 98
+
#define QSFP_PAGE_SELECT_BYTE_OFFS 127
/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
#define QSFP_MOD_ID_OFFS 128
@@ -87,7 +89,8 @@
/* Byte 130 is Connector type. Not Intel req'd */
/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
/* Byte 139 is encoding. code 0x01 is 8b10b. Not Intel req'd */
-/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not Intel req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec */
+#define QSFP_NOM_BIT_RATE_100_OFFS 140
/* Byte 141 is Extended Rate Select. Not Intel req'd */
/* Bytes 142..145 are lengths for various fiber types. Not Intel req'd */
/* Byte 146 is length for Copper. Units of 1 meter */
@@ -135,11 +138,18 @@ extern const char *const hfi1_qsfp_devtech[16];
*/
#define QSFP_ATTEN_OFFS 186
#define QSFP_ATTEN_LEN 2
-/* Bytes 188,189 are Wavelength tolerance, not Intel req'd */
+/*
+ * Bytes 188,189 are Wavelength tolerance, if optical
+ * If copper, they are attenuation in dB:
+ * Byte 188 is at 12.5 Gb/s, Byte 189 at 25 Gb/s
+ */
+#define QSFP_CU_ATTEN_7G_OFFS 188
+#define QSFP_CU_ATTEN_12G_OFFS 189
/* Byte 190 is Max Case Temp. Not Intel req'd */
/* Byte 191 is LSB of sum of bytes 128..190. Not Intel req'd */
#define QSFP_CC_OFFS 191
-/* Bytes 192..195 are Options implemented in qsfp. Not Intel req'd */
+#define QSFP_EQ_INFO_OFFS 193
+#define QSFP_CDR_INFO_OFFS 194
/* Bytes 196..211 are Serial Number, String */
#define QSFP_SN_OFFS 196
#define QSFP_SN_LEN 16
@@ -150,6 +160,8 @@ extern const char *const hfi1_qsfp_devtech[16];
#define QSFP_LOT_OFFS 218
#define QSFP_LOT_LEN 2
/* Bytes 220, 221 indicate monitoring options, Not Intel req'd */
+/* Byte 222 indicates nominal bitrate in units of 250Mbits/sec */
+#define QSFP_NOM_BIT_RATE_250_OFFS 222
/* Byte 223 is LSB of sum of bytes 192..222 */
#define QSFP_CC_EXT_OFFS 223
@@ -191,6 +203,7 @@ extern const char *const hfi1_qsfp_devtech[16];
*/
#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
@@ -198,10 +211,12 @@ struct qsfp_data {
/* Helps to find our way */
struct hfi1_pportdata *ppd;
struct work_struct qsfp_work;
- u8 cache[QSFP_MAX_NUM_PAGES*128];
+ u8 cache[QSFP_MAX_NUM_PAGES * 128];
+ /* protect qsfp data */
spinlock_t qsfp_lock;
u8 check_interrupt_flags;
- u8 qsfp_interrupt_functional;
+ u8 reset_needed;
+ u8 limiting_active;
u8 cache_valid;
u8 cache_refresh_required;
};
@@ -220,3 +235,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+ int len);
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+ int len);
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 6f4a155f7931..0d7e1017f3cb 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -49,18 +46,151 @@
*/
#include <linux/io.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
#include "hfi.h"
#include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
#include "trace.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
-static void rc_timeout(unsigned long arg);
+/**
+ * hfi1_add_retry_timer - add/start a retry timer
+ * @qp - the QP
+ *
+ * add a retry timer on the QP
+ */
+static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ qp->s_flags |= RVT_S_TIMER;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ qp->s_timer.expires = jiffies + qp->timeout_jiffies +
+ rdi->busy_jiffies;
+ add_timer(&qp->s_timer);
+}
+
+/**
+ * hfi1_add_rnr_timer - add/start an rnr timer
+ * @qp - the QP
+ * @to - timeout in usecs
+ *
+ * add an rnr timer on the QP
+ */
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ qp->s_flags |= RVT_S_WAIT_RNR;
+ qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+ add_timer(&priv->s_rnr_timer);
+}
+
+/**
+ * hfi1_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ *
+ * Modify a potentially already running retry
+ * timer
+ */
+static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ qp->s_flags |= RVT_S_TIMER;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
+ rdi->busy_jiffies);
+}
+
+/**
+ * hfi1_stop_retry_timer - stop a retry timer
+ * @qp - the QP
+ *
+ * stop a retry timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
+{
+ int rval = 0;
+
+ /* Remove QP from retry */
+ if (qp->s_flags & RVT_S_TIMER) {
+ qp->s_flags &= ~RVT_S_TIMER;
+ rval = del_timer(&qp->s_timer);
+ }
+ return rval;
+}
+
+/**
+ * hfi1_stop_rc_timers - stop all timers
+ * @qp - the QP
+ *
+ * stop any pending timers
+ */
+void hfi1_stop_rc_timers(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ /* Remove QP from all timers */
+ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ del_timer(&priv->s_rnr_timer);
+ }
+}
+
+/**
+ * hfi1_stop_rnr_timer - stop an rnr timer
+ * @qp - the QP
+ *
+ * stop an rnr timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
+{
+ int rval = 0;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ /* Remove QP from rnr timer */
+ if (qp->s_flags & RVT_S_WAIT_RNR) {
+ qp->s_flags &= ~RVT_S_WAIT_RNR;
+ rval = del_timer(&priv->s_rnr_timer);
+ }
+ return rval;
+}
+
+/**
+ * hfi1_del_timers_sync - wait for any timeout routines to exit
+ * @qp - the QP
+ */
+void hfi1_del_timers_sync(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ del_timer_sync(&qp->s_timer);
+ del_timer_sync(&priv->s_rnr_timer);
+}
-static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
+/* only opcode mask for adaptive pio */
+const u32 rc_only_opcode =
+ BIT(OP(SEND_ONLY) & 0x1f) |
+ BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+ BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+ BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
+ BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
+ BIT(OP(ACKNOWLEDGE & 0x1f)) |
+ BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
+ BIT(OP(COMPARE_SWAP & 0x1f)) |
+ BIT(OP(FETCH_ADD & 0x1f));
+
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
u32 len;
@@ -74,38 +204,32 @@ static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
return wqe->length - len;
}
-static void start_timer(struct hfi1_qp *qp)
-{
- qp->s_flags |= HFI1_S_TIMER;
- qp->s_timer.function = rc_timeout;
- /* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies;
- add_timer(&qp->s_timer);
-}
-
/**
* make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
* @dev: the device for this QP
* @qp: a pointer to the QP
* @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
+ * @ps: the xmit packet state
*
* Return 1 if constructed; otherwise, return 0.
* Note that we are in the responder's side of the QP context.
* Note the QP s_lock must be held.
*/
-static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
- struct hfi1_other_headers *ohdr, u32 pmtu)
+static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
+ struct hfi1_other_headers *ohdr,
+ struct hfi1_pkt_state *ps)
{
- struct hfi1_ack_entry *e;
+ struct rvt_ack_entry *e;
u32 hwords;
u32 len;
u32 bth0;
u32 bth2;
int middle = 0;
+ u32 pmtu = qp->pmtu;
+ struct hfi1_qp_priv *priv = qp->priv;
/* Don't send an ACK if we aren't supposed to. */
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -116,7 +240,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) {
- hfi1_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */
@@ -133,7 +257,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
- if (qp->s_flags & HFI1_S_ACK_PENDING)
+ if (qp->s_flags & RVT_S_ACK_PENDING)
goto normal;
goto bail;
}
@@ -152,9 +276,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
goto bail;
}
/* Copy SGE state in case we need to resend */
- qp->s_rdma_mr = e->rdma_sge.mr;
- if (qp->s_rdma_mr)
- hfi1_get_mr(qp->s_rdma_mr);
+ ps->s_txreq->mr = e->rdma_sge.mr;
+ if (ps->s_txreq->mr)
+ rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -191,9 +315,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
qp->s_cur_sge = &qp->s_ack_rdma_sge;
- qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
- if (qp->s_rdma_mr)
- hfi1_get_mr(qp->s_rdma_mr);
+ ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
+ if (ps->s_txreq->mr)
+ rvt_get_mr(ps->s_txreq->mr);
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu) {
len = pmtu;
@@ -218,7 +342,7 @@ normal:
* (see above).
*/
qp->s_ack_state = OP(SEND_ONLY);
- qp->s_flags &= ~HFI1_S_ACK_PENDING;
+ qp->s_flags &= ~RVT_S_ACK_PENDING;
qp->s_cur_sge = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
@@ -234,20 +358,23 @@ normal:
}
qp->s_rdma_ack_cnt++;
qp->s_hdrwords = hwords;
+ ps->s_txreq->sde = priv->s_sde;
qp->s_cur_size = len;
- hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle);
+ hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
+ /* pbc */
+ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
return 1;
bail:
qp->s_ack_state = OP(ACKNOWLEDGE);
/*
* Ensure s_rdma_ack_cnt changes are committed prior to resetting
- * HFI1_S_RESP_PENDING
+ * RVT_S_RESP_PENDING
*/
smp_wmb();
- qp->s_flags &= ~(HFI1_S_RESP_PENDING
- | HFI1_S_ACK_PENDING
- | HFI1_S_AHG_VALID);
+ qp->s_flags &= ~(RVT_S_RESP_PENDING
+ | RVT_S_ACK_PENDING
+ | RVT_S_AHG_VALID);
return 0;
}
@@ -255,14 +382,17 @@ bail:
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
*
+ * Assumes s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
-int hfi1_make_rc_req(struct hfi1_qp *qp)
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct hfi1_other_headers *ohdr;
- struct hfi1_sge_state *ss;
- struct hfi1_swqe *wqe;
+ struct rvt_sge_state *ss;
+ struct rvt_swqe *wqe;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
u32 hwords = 5;
u32 len;
@@ -270,51 +400,48 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
u32 bth2;
u32 pmtu = qp->pmtu;
char newreq;
- unsigned long flags;
- int ret = 0;
int middle = 0;
int delta;
- ohdr = &qp->s_hdr->ibh.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr->ibh.u.l.oth;
+ ps->s_txreq = get_txreq(ps->dev, qp);
+ if (IS_ERR(ps->s_txreq))
+ goto bail_no_tx;
- /*
- * The lock is needed to synchronize between the sending tasklet,
- * the receive interrupt handler, and timeout re-sends.
- */
- spin_lock_irqsave(&qp->s_lock, flags);
+ ohdr = &ps->s_txreq->phdr.hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Sending responses has higher priority over sending requests. */
- if ((qp->s_flags & HFI1_S_RESP_PENDING) &&
- make_rc_ack(dev, qp, ohdr, pmtu))
- goto done;
+ if ((qp->s_flags & RVT_S_RESP_PENDING) &&
+ make_rc_ack(dev, qp, ohdr, ps))
+ return 1;
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
- if (atomic_read(&qp->s_iowait.sdma_busy)) {
- qp->s_flags |= HFI1_S_WAIT_DMA;
+ if (iowait_sdma_pending(&priv->s_iowait)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
clear_ahg(qp);
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
- goto done;
+ goto done_free_tx;
}
- if (qp->s_flags & (HFI1_S_WAIT_RNR | HFI1_S_WAIT_ACK))
+ if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
goto bail;
if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
- qp->s_flags |= HFI1_S_WAIT_PSN;
+ qp->s_flags |= RVT_S_WAIT_PSN;
goto bail;
}
qp->s_sending_psn = qp->s_psn;
@@ -322,10 +449,10 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
}
/* Send a request. */
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) {
default:
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/*
* Resend an old request or start a new one.
@@ -347,11 +474,11 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
*/
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
qp->s_num_rd_atomic) {
- qp->s_flags |= HFI1_S_WAIT_FENCE;
+ qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
- wqe->psn = qp->s_next_psn;
newreq = 1;
+ qp->s_psn = wqe->psn;
}
/*
* Note that we have to be careful not to modify the
@@ -365,21 +492,19 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+ qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
}
- if (wqe->wr.opcode == IB_WR_SEND)
+ if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_ONLY);
- else {
+ } else {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -393,14 +518,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
break;
case IB_WR_RDMA_WRITE:
- if (newreq && !(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+ if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+ qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
ohdr->u.rc.reth.vaddr =
@@ -409,16 +534,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
qp->s_state = OP(RDMA_WRITE_ONLY);
- else {
+ } else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
@@ -440,19 +563,12 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
if (newreq) {
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
- qp->s_flags |= HFI1_S_WAIT_RDMAR;
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
- if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- /*
- * Adjust s_next_psn to count the
- * expected number of responses.
- */
- if (len > pmtu)
- qp->s_next_psn += (len - 1) / pmtu;
- wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->rdma_wr.remote_addr);
@@ -477,13 +593,12 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
if (newreq) {
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
- qp->s_flags |= HFI1_S_WAIT_RDMAR;
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
- if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- wqe->lpsn = wqe->psn;
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
@@ -526,11 +641,8 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
}
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1;
- else {
+ else
qp->s_psn++;
- if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
- }
break;
case OP(RDMA_READ_RESPONSE_FIRST):
@@ -550,8 +662,6 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
- if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -559,9 +669,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
- if (wqe->wr.opcode == IB_WR_SEND)
+ if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_LAST);
- else {
+ } else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -592,8 +702,6 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
- if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -601,9 +709,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
qp->s_state = OP(RDMA_WRITE_LAST);
- else {
+ } else {
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -648,13 +756,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
delta = delta_psn(bth2, wqe->psn);
if (delta && delta % HFI1_PSN_CREDIT == 0)
bth2 |= IB_BTH_REQ_ACK;
- if (qp->s_flags & HFI1_S_SEND_ONE) {
- qp->s_flags &= ~HFI1_S_SEND_ONE;
- qp->s_flags |= HFI1_S_WAIT_ACK;
+ if (qp->s_flags & RVT_S_SEND_ONE) {
+ qp->s_flags &= ~RVT_S_SEND_ONE;
+ qp->s_flags |= RVT_S_WAIT_ACK;
bth2 |= IB_BTH_REQ_ACK;
}
qp->s_len -= len;
qp->s_hdrwords = hwords;
+ ps->s_txreq->sde = priv->s_sde;
qp->s_cur_sge = ss;
qp->s_cur_size = len;
hfi1_make_ruc_header(
@@ -662,16 +771,25 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
ohdr,
bth0 | (qp->s_state << 24),
bth2,
- middle);
-done:
- ret = 1;
- goto unlock;
+ middle,
+ ps);
+ /* pbc */
+ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+ return 1;
+
+done_free_tx:
+ hfi1_put_txreq(ps->s_txreq);
+ ps->s_txreq = NULL;
+ return 1;
bail:
- qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- return ret;
+ hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+ ps->s_txreq = NULL;
+ qp->s_flags &= ~RVT_S_BUSY;
+ qp->s_hdrwords = 0;
+ return 0;
}
/**
@@ -682,7 +800,7 @@ unlock:
* Note that RDMA reads and atomics are handled in the
* send side QP state and tasklet.
*/
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -700,7 +818,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
unsigned long flags;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
- if (qp->s_flags & HFI1_S_RESP_PENDING)
+ if (qp->s_flags & RVT_S_RESP_PENDING)
goto queue_ack;
/* Ensure s_rdma_ack_cnt changes are committed */
@@ -763,7 +881,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
goto queue_ack;
}
- trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+ trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
/* write the pbc and data */
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -771,13 +889,13 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
return;
queue_ack:
- this_cpu_inc(*ibp->rc_qacks);
+ this_cpu_inc(*ibp->rvp.rc_qacks);
spin_lock_irqsave(&qp->s_lock, flags);
- qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
if (is_fecn)
- qp->s_flags |= HFI1_S_ECN;
+ qp->s_flags |= RVT_S_ECN;
/* Schedule the send tasklet. */
hfi1_schedule_send(qp);
@@ -793,10 +911,10 @@ queue_ack:
* for the given QP.
* Called at interrupt level with the QP s_lock held.
*/
-static void reset_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
{
u32 n = qp->s_acked;
- struct hfi1_swqe *wqe = get_swqe_ptr(qp, n);
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
qp->s_cur = n;
@@ -819,7 +937,7 @@ static void reset_psn(struct hfi1_qp *qp, u32 psn)
n = 0;
if (n == qp->s_tail)
break;
- wqe = get_swqe_ptr(qp, n);
+ wqe = rvt_get_swqe_ptr(qp, n);
diff = cmp_psn(psn, wqe->psn);
if (diff < 0)
break;
@@ -865,23 +983,23 @@ static void reset_psn(struct hfi1_qp *qp, u32 psn)
done:
qp->s_psn = psn;
/*
- * Set HFI1_S_WAIT_PSN as rc_complete() may start the timer
+ * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
* asynchronously before the send tasklet can get scheduled.
* Doing it in hfi1_make_rc_req() is too late.
*/
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
(cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
- qp->s_flags |= HFI1_S_WAIT_PSN;
- qp->s_flags &= ~HFI1_S_AHG_VALID;
+ qp->s_flags |= RVT_S_WAIT_PSN;
+ qp->s_flags &= ~RVT_S_AHG_VALID;
}
/*
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
+static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
- struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
if (qp->s_retry == 0) {
@@ -890,42 +1008,44 @@ static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
- hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
- } else /* need to handle delayed completion */
+ } else { /* need to handle delayed completion */
return;
- } else
+ }
+ } else {
qp->s_retry--;
+ }
ibp = to_iport(qp->ibqp.device, qp->port_num);
if (wqe->wr.opcode == IB_WR_RDMA_READ)
- ibp->n_rc_resends++;
+ ibp->rvp.n_rc_resends++;
else
- ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+ ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
- qp->s_flags &= ~(HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR |
- HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_PSN |
- HFI1_S_WAIT_ACK);
+ qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+ RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+ RVT_S_WAIT_ACK);
if (wait)
- qp->s_flags |= HFI1_S_SEND_ONE;
+ qp->s_flags |= RVT_S_SEND_ONE;
reset_psn(qp, psn);
}
/*
* This is called from s_timer for missing responses.
*/
-static void rc_timeout(unsigned long arg)
+void hfi1_rc_timeout(unsigned long arg)
{
- struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
struct hfi1_ibport *ibp;
unsigned long flags;
spin_lock_irqsave(&qp->r_lock, flags);
spin_lock(&qp->s_lock);
- if (qp->s_flags & HFI1_S_TIMER) {
+ if (qp->s_flags & RVT_S_TIMER) {
ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->n_rc_timeouts++;
- qp->s_flags &= ~HFI1_S_TIMER;
+ ibp->rvp.n_rc_timeouts++;
+ qp->s_flags &= ~RVT_S_TIMER;
del_timer(&qp->s_timer);
trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1);
restart_rc(qp, qp->s_last_psn + 1, 1);
@@ -940,15 +1060,12 @@ static void rc_timeout(unsigned long arg)
*/
void hfi1_rc_rnr_retry(unsigned long arg)
{
- struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
unsigned long flags;
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & HFI1_S_WAIT_RNR) {
- qp->s_flags &= ~HFI1_S_WAIT_RNR;
- del_timer(&qp->s_timer);
- hfi1_schedule_send(qp);
- }
+ hfi1_stop_rnr_timer(qp);
+ hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
@@ -956,14 +1073,14 @@ void hfi1_rc_rnr_retry(unsigned long arg)
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
*/
-static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
{
- struct hfi1_swqe *wqe;
+ struct rvt_swqe *wqe;
u32 n = qp->s_last;
/* Find the work request corresponding to the given PSN. */
for (;;) {
- wqe = get_swqe_ptr(qp, n);
+ wqe = rvt_get_swqe_ptr(qp, n);
if (cmp_psn(psn, wqe->lpsn) <= 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_sending_psn = wqe->lpsn + 1;
@@ -981,16 +1098,16 @@ static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
{
struct hfi1_other_headers *ohdr;
- struct hfi1_swqe *wqe;
+ struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
/* Find out where the BTH is */
@@ -1016,22 +1133,30 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
*/
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
!(qp->s_flags &
- (HFI1_S_TIMER | HFI1_S_WAIT_RNR | HFI1_S_WAIT_PSN)) &&
- (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
- start_timer(qp);
+ (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+ (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ hfi1_add_retry_timer(qp);
while (qp->s_last != qp->s_acked) {
- wqe = get_swqe_ptr(qp, qp->s_last);
+ u32 s_last;
+
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
+ s_last = qp->s_last;
+ if (++s_last >= qp->s_size)
+ s_last = 0;
+ qp->s_last = s_last;
+ /* see post_send() */
+ barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
- struct hfi1_sge *sge = &wqe->sg_list[i];
+ struct rvt_sge *sge = &wqe->sg_list[i];
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
@@ -1039,26 +1164,24 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
- hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
}
/*
* If we were waiting for sends to complete before re-sending,
* and they are now complete, restart sending.
*/
trace_hfi1_rc_sendcomplete(qp, psn);
- if (qp->s_flags & HFI1_S_WAIT_PSN &&
+ if (qp->s_flags & RVT_S_WAIT_PSN &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
- qp->s_flags &= ~HFI1_S_WAIT_PSN;
+ qp->s_flags &= ~RVT_S_WAIT_PSN;
qp->s_sending_psn = qp->s_psn;
qp->s_sending_hpsn = qp->s_psn - 1;
hfi1_schedule_send(qp);
}
}
-static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
{
qp->s_last_psn = psn;
}
@@ -1068,9 +1191,9 @@ static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
* This is similar to hfi1_send_complete but has to check to be sure
* that the SGEs are not being referenced if the SWQE is being resent.
*/
-static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
- struct hfi1_swqe *wqe,
- struct hfi1_ibport *ibp)
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ struct hfi1_ibport *ibp)
{
struct ib_wc wc;
unsigned i;
@@ -1082,13 +1205,21 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
*/
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+ u32 s_last;
+
for (i = 0; i < wqe->wr.num_sge; i++) {
- struct hfi1_sge *sge = &wqe->sg_list[i];
+ struct rvt_sge *sge = &wqe->sg_list[i];
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
+ s_last = qp->s_last;
+ if (++s_last >= qp->s_size)
+ s_last = 0;
+ qp->s_last = s_last;
+ /* see post_send() */
+ barrier();
/* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
@@ -1096,14 +1227,12 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
- hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- this_cpu_inc(*ibp->rc_delayed_comp);
+ this_cpu_inc(*ibp->rvp.rc_delayed_comp);
/*
* If send progress not running attempt to progress
* SDMA queue.
@@ -1131,7 +1260,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
qp->s_acked = qp->s_cur;
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
if (qp->s_acked != qp->s_tail) {
qp->s_state = OP(SEND_LAST);
qp->s_psn = wqe->psn;
@@ -1141,7 +1270,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
qp->s_acked = 0;
if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
qp->s_draining = 0;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
}
return wqe;
}
@@ -1157,21 +1286,16 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
* May be called at interrupt level, with the QP s_lock held.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
-static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u64 val, struct hfi1_ctxtdata *rcd)
{
struct hfi1_ibport *ibp;
enum ib_wc_status status;
- struct hfi1_swqe *wqe;
+ struct rvt_swqe *wqe;
int ret = 0;
u32 ack_psn;
int diff;
-
- /* Remove QP from retry timer */
- if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
- qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
+ unsigned long to;
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
@@ -1182,7 +1306,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
ack_psn = psn;
if (aeth >> 29)
ack_psn--;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
ibp = to_iport(qp->ibqp.device, qp->port_num);
/*
@@ -1200,7 +1324,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
diff == 0) {
ret = 1;
- goto bail;
+ goto bail_stop;
}
/*
* If this request is a RDMA read or atomic, and the ACK is
@@ -1217,11 +1341,11 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
/* Retry this request. */
- if (!(qp->r_flags & HFI1_R_RDMAR_SEQ)) {
- qp->r_flags |= HFI1_R_RDMAR_SEQ;
+ if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= HFI1_R_RSP_SEND;
+ qp->r_flags |= RVT_R_RSP_SEND;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
@@ -1231,7 +1355,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
* No need to process the ACK/NAK since we are
* restarting an earlier request.
*/
- goto bail;
+ goto bail_stop;
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
@@ -1244,14 +1368,14 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
qp->s_num_rd_atomic--;
/* Restart sending task if fence is complete */
- if ((qp->s_flags & HFI1_S_WAIT_FENCE) &&
+ if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
!qp->s_num_rd_atomic) {
- qp->s_flags &= ~(HFI1_S_WAIT_FENCE |
- HFI1_S_WAIT_ACK);
+ qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+ RVT_S_WAIT_ACK);
hfi1_schedule_send(qp);
- } else if (qp->s_flags & HFI1_S_WAIT_RDMAR) {
- qp->s_flags &= ~(HFI1_S_WAIT_RDMAR |
- HFI1_S_WAIT_ACK);
+ } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+ qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+ RVT_S_WAIT_ACK);
hfi1_schedule_send(qp);
}
}
@@ -1262,40 +1386,43 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
switch (aeth >> 29) {
case 0: /* ACK */
- this_cpu_inc(*ibp->rc_acks);
+ this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
/*
* We are expecting more ACKs so
- * reset the re-transmit timer.
+ * mod the retry timer.
*/
- start_timer(qp);
+ hfi1_mod_retry_timer(qp);
/*
* We can stop re-sending the earlier packets and
* continue with the next packet the receiver wants.
*/
if (cmp_psn(qp->s_psn, psn) <= 0)
reset_psn(qp, psn + 1);
- } else if (cmp_psn(qp->s_psn, psn) <= 0) {
- qp->s_state = OP(SEND_LAST);
- qp->s_psn = psn + 1;
+ } else {
+ /* No more acks - kill all timers */
+ hfi1_stop_rc_timers(qp);
+ if (cmp_psn(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
+ }
}
- if (qp->s_flags & HFI1_S_WAIT_ACK) {
- qp->s_flags &= ~HFI1_S_WAIT_ACK;
+ if (qp->s_flags & RVT_S_WAIT_ACK) {
+ qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
}
hfi1_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, psn);
- ret = 1;
- goto bail;
+ return 1;
case 1: /* RNR NAK */
- ibp->n_rnr_naks++;
+ ibp->rvp.n_rnr_naks++;
if (qp->s_acked == qp->s_tail)
- goto bail;
- if (qp->s_flags & HFI1_S_WAIT_RNR)
- goto bail;
+ goto bail_stop;
+ if (qp->s_flags & RVT_S_WAIT_RNR)
+ goto bail_stop;
if (qp->s_rnr_retry == 0) {
status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
@@ -1306,28 +1433,27 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+ ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
reset_psn(qp, psn);
- qp->s_flags &= ~(HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_ACK);
- qp->s_flags |= HFI1_S_WAIT_RNR;
- qp->s_timer.function = hfi1_rc_rnr_retry;
- qp->s_timer.expires = jiffies + usecs_to_jiffies(
+ qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+ hfi1_stop_rc_timers(qp);
+ to =
ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK]);
- add_timer(&qp->s_timer);
- goto bail;
+ HFI1_AETH_CREDIT_MASK];
+ hfi1_add_rnr_timer(qp, to);
+ return 0;
case 3: /* NAK */
if (qp->s_acked == qp->s_tail)
- goto bail;
+ goto bail_stop;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
HFI1_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
- ibp->n_seq_naks++;
+ ibp->rvp.n_seq_naks++;
/*
* Back up to the responder's expected PSN.
* Note that we might get a NAK in the middle of an
@@ -1340,21 +1466,21 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
case 1: /* Invalid Request */
status = IB_WC_REM_INV_REQ_ERR;
- ibp->n_other_naks++;
+ ibp->rvp.n_other_naks++;
goto class_b;
case 2: /* Remote Access Error */
status = IB_WC_REM_ACCESS_ERR;
- ibp->n_other_naks++;
+ ibp->rvp.n_other_naks++;
goto class_b;
case 3: /* Remote Operation Error */
status = IB_WC_REM_OP_ERR;
- ibp->n_other_naks++;
+ ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, status);
- hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1364,15 +1490,16 @@ class_b:
}
qp->s_retry = qp->s_retry_cnt;
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
- goto bail;
+ goto bail_stop;
default: /* 2: reserved */
reserved:
/* Ignore reserved NAK codes. */
- goto bail;
+ goto bail_stop;
}
-
-bail:
+ return ret;
+bail_stop:
+ hfi1_stop_rc_timers(qp);
return ret;
}
@@ -1380,18 +1507,15 @@ bail:
* We have seen an out of sequence RDMA read middle or last packet.
* This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
*/
-static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
struct hfi1_ctxtdata *rcd)
{
- struct hfi1_swqe *wqe;
+ struct rvt_swqe *wqe;
/* Remove QP from retry timer */
- if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
- qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
+ hfi1_stop_rc_timers(qp);
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
while (cmp_psn(psn, wqe->lpsn) > 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1401,11 +1525,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
wqe = do_rc_completion(qp, wqe, ibp);
}
- ibp->n_rdma_seq++;
- qp->r_flags |= HFI1_R_RDMAR_SEQ;
+ ibp->rvp.n_rdma_seq++;
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= HFI1_R_RSP_SEND;
+ qp->r_flags |= RVT_R_RSP_SEND;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
@@ -1429,11 +1553,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
*/
static void rc_rcv_resp(struct hfi1_ibport *ibp,
struct hfi1_other_headers *ohdr,
- void *data, u32 tlen, struct hfi1_qp *qp,
+ void *data, u32 tlen, struct rvt_qp *qp,
u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
struct hfi1_ctxtdata *rcd)
{
- struct hfi1_swqe *wqe;
+ struct rvt_swqe *wqe;
enum ib_wc_status status;
unsigned long flags;
int diff;
@@ -1446,7 +1570,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
trace_hfi1_rc_ack(qp, psn);
/* Ignore invalid responses. */
- if (cmp_psn(psn, qp->s_next_psn) >= 0)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1465,15 +1590,15 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
* Skip everything other than the PSN we expect, if we are waiting
* for a reply to a restarted RDMA read or atomic op.
*/
- if (qp->r_flags & HFI1_R_RDMAR_SEQ) {
+ if (qp->r_flags & RVT_R_RDMAR_SEQ) {
if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
goto ack_done;
- qp->r_flags &= ~HFI1_R_RDMAR_SEQ;
+ qp->r_flags &= ~RVT_R_RDMAR_SEQ;
}
if (unlikely(qp->s_acked == qp->s_tail))
goto ack_done;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
status = IB_WC_SUCCESS;
switch (opcode) {
@@ -1484,14 +1609,15 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
__be32 *p = ohdr->u.at.atomic_ack_eth;
- val = ((u64) be32_to_cpu(p[0]) << 32) |
+ val = ((u64)be32_to_cpu(p[0]) << 32) |
be32_to_cpu(p[1]);
- } else
+ } else {
val = 0;
+ }
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/*
@@ -1519,10 +1645,10 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= HFI1_S_TIMER;
+ qp->s_flags |= RVT_S_TIMER;
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
- if (qp->s_flags & HFI1_S_WAIT_ACK) {
- qp->s_flags &= ~HFI1_S_WAIT_ACK;
+ if (qp->s_flags & RVT_S_WAIT_ACK) {
+ qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
}
@@ -1536,7 +1662,7 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+ hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1556,7 +1682,7 @@ read_middle:
* have to be careful to copy the data to the right
* location.
*/
- wqe = get_swqe_ptr(qp, qp->s_acked);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
wqe, psn, pmtu);
goto read_last;
@@ -1580,9 +1706,9 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+ hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
WARN_ON(qp->s_rdma_read_sge.num_sge);
- (void) do_rc_ack(qp, aeth, psn,
+ (void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
goto ack_done;
}
@@ -1600,7 +1726,7 @@ ack_len_err:
ack_err:
if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, status);
- hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1609,22 +1735,24 @@ bail:
}
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
- struct hfi1_qp *qp)
+ struct rvt_qp *qp)
{
if (list_empty(&qp->rspwait)) {
- qp->r_flags |= HFI1_R_RSP_DEFERED_ACK;
+ qp->r_flags |= RVT_R_RSP_NAK;
atomic_inc(&qp->refcount);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
-static inline void rc_cancel_ack(struct hfi1_qp *qp)
+static inline void rc_cancel_ack(struct rvt_qp *qp)
{
- qp->r_adefered = 0;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->r_adefered = 0;
if (list_empty(&qp->rspwait))
return;
list_del_init(&qp->rspwait);
- qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+ qp->r_flags &= ~RVT_R_RSP_NAK;
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
@@ -1645,11 +1773,11 @@ static inline void rc_cancel_ack(struct hfi1_qp *qp)
* schedule a response to be sent.
*/
static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
- struct hfi1_qp *qp, u32 opcode, u32 psn, int diff,
- struct hfi1_ctxtdata *rcd)
+ struct rvt_qp *qp, u32 opcode, u32 psn,
+ int diff, struct hfi1_ctxtdata *rcd)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_ack_entry *e;
+ struct rvt_ack_entry *e;
unsigned long flags;
u8 i, prev;
int old_req;
@@ -1662,7 +1790,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
* Don't queue the NAK if we already sent one.
*/
if (!qp->r_nak_state) {
- ibp->n_rc_seqnak++;
+ ibp->rvp.n_rc_seqnak++;
qp->r_nak_state = IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
qp->r_ack_psn = qp->r_psn;
@@ -1694,7 +1822,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
*/
e = NULL;
old_req = 1;
- ibp->n_rc_dupreq++;
+ ibp->rvp.n_rc_dupreq++;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1747,7 +1875,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
if (e->rdma_sge.mr) {
- hfi1_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
if (len != 0) {
@@ -1755,8 +1883,8 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
- ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
- IB_ACCESS_REMOTE_READ);
+ ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+ IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
goto unlock_done;
} else {
@@ -1778,7 +1906,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
* or the send tasklet is already backed up to send an
* earlier entry, we can ignore this request.
*/
- if (!e || e->opcode != (u8) opcode || old_req)
+ if (!e || e->opcode != (u8)opcode || old_req)
goto unlock_done;
qp->s_tail_ack_queue = prev;
break;
@@ -1810,7 +1938,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
break;
}
qp->s_ack_state = OP(ACKNOWLEDGE);
- qp->s_flags |= HFI1_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_RESP_PENDING;
qp->r_nak_state = 0;
hfi1_schedule_send(qp);
@@ -1823,13 +1951,13 @@ send_ack:
return 0;
}
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
{
unsigned long flags;
int lastwqe;
spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = hfi1_error_qp(qp, err);
+ lastwqe = rvt_error_qp(qp, err);
spin_unlock_irqrestore(&qp->s_lock, flags);
if (lastwqe) {
@@ -1842,7 +1970,7 @@ void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
}
}
-static inline void update_ack_queue(struct hfi1_qp *qp, unsigned n)
+static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
@@ -1864,14 +1992,14 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
spin_lock_irqsave(&ppd->cc_log_lock, flags);
- ppd->threshold_cong_event_map[sl/8] |= 1 << (sl % 8);
+ ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
ppd->threshold_event_counter++;
cc_event = &ppd->cc_events[ppd->cc_log_idx++];
if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
ppd->cc_log_idx = 0;
- cc_event->lqpn = lqpn & HFI1_QPN_MASK;
- cc_event->rqpn = rqpn & HFI1_QPN_MASK;
+ cc_event->lqpn = lqpn & RVT_QPN_MASK;
+ cc_event->rqpn = rqpn & RVT_QPN_MASK;
cc_event->sl = sl;
cc_event->svc_type = svc_type;
cc_event->rlid = rlid;
@@ -1897,7 +2025,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
cc_state = get_cc_state(ppd);
- if (cc_state == NULL)
+ if (!cc_state)
return;
/*
@@ -1957,7 +2085,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
- struct hfi1_qp *qp = packet->qp;
+ struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_other_headers *ohdr = packet->ohdr;
@@ -1972,6 +2100,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
unsigned long flags;
u32 bth1;
int ret, is_fecn = 0;
+ int copy_last = 0;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
@@ -2054,13 +2183,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
break;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
qp_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- ret = hfi1_get_rwqe(qp, 0);
+ ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2076,12 +2205,12 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
- ret = hfi1_get_rwqe(qp, 1);
+ ret = hfi1_rvt_get_rwqe(qp, 1);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2090,7 +2219,7 @@ send_middle:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
- ret = hfi1_get_rwqe(qp, 0);
+ ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2104,8 +2233,10 @@ send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
- case OP(SEND_LAST):
case OP(RDMA_WRITE_LAST):
+ copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+ /* fall through */
+ case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
wc.ex.imm_data = 0;
@@ -2121,10 +2252,10 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
- hfi1_put_ss(&qp->r_sge);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+ rvt_put_ss(&qp->r_sge);
qp->r_msn++;
- if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -2154,12 +2285,14 @@ send_last:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- (bth0 & IB_BTH_SOLICITED) != 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ (bth0 & IB_BTH_SOLICITED) != 0);
break;
- case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
+ copy_last = 1;
+ /* fall through */
+ case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto nack_inv;
@@ -2174,8 +2307,8 @@ send_last:
int ok;
/* Check rkey & NAK */
- ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
- rkey, IB_ACCESS_REMOTE_WRITE);
+ ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+ rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto nack_acc;
qp->r_sge.num_sge = 1;
@@ -2190,7 +2323,7 @@ send_last:
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
- ret = hfi1_get_rwqe(qp, 1);
+ ret = hfi1_rvt_get_rwqe(qp, 1);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2200,7 +2333,7 @@ send_last:
goto send_last;
case OP(RDMA_READ_REQUEST): {
- struct hfi1_ack_entry *e;
+ struct rvt_ack_entry *e;
u32 len;
u8 next;
@@ -2218,7 +2351,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- hfi1_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
@@ -2229,8 +2362,8 @@ send_last:
int ok;
/* Check rkey & NAK */
- ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr,
- rkey, IB_ACCESS_REMOTE_READ);
+ ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
goto nack_acc_unlck;
/*
@@ -2261,7 +2394,7 @@ send_last:
qp->r_head_ack_queue = next;
/* Schedule the send tasklet. */
- qp->s_flags |= HFI1_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2273,7 +2406,7 @@ send_last:
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
- struct hfi1_ack_entry *e;
+ struct rvt_ack_entry *e;
u64 vaddr;
atomic64_t *maddr;
u64 sdata;
@@ -2293,29 +2426,29 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- hfi1_put_mr(e->rdma_sge.mr);
+ rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
- if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- vaddr, rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ vaddr, rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
- maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
- (u64) atomic64_add_return(sdata, maddr) - sdata :
- (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ (u64)atomic64_add_return(sdata, maddr) - sdata :
+ (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
be64_to_cpu(ateth->compare_data),
sdata);
- hfi1_put_mr(qp->r_sge.sge.mr);
+ rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
@@ -2328,7 +2461,7 @@ send_last:
qp->r_head_ack_queue = next;
/* Schedule the send tasklet. */
- qp->s_flags |= HFI1_S_RESP_PENDING;
+ qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2347,11 +2480,13 @@ send_last:
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & IB_BTH_REQ_ACK) {
+ struct hfi1_qp_priv *priv = qp->priv;
+
if (packet->numpkt == 0) {
rc_cancel_ack(qp);
goto send_ack;
}
- if (qp->r_adefered >= HFI1_PSN_CREDIT) {
+ if (priv->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
@@ -2359,13 +2494,13 @@ send_last:
rc_cancel_ack(qp);
goto send_ack;
}
- qp->r_adefered++;
+ priv->r_adefered++;
rc_defered_ack(rcd, qp);
}
return;
rnr_nak:
- qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
qp->r_ack_psn = qp->r_psn;
/* Queue RNR NAK for later */
rc_defered_ack(rcd, qp);
@@ -2403,7 +2538,7 @@ void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
struct hfi1_ib_header *hdr,
u32 rcv_flags,
- struct hfi1_qp *qp)
+ struct rvt_qp *qp)
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
struct hfi1_other_headers *ohdr;
@@ -2428,7 +2563,7 @@ void hfi1_rc_hdrerr(
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff = delta_psn(psn, qp->r_psn);
if (!qp->r_nak_state && diff >= 0) {
- ibp->n_rc_seqnak++;
+ ibp->rvp.n_rc_seqnak++;
qp->r_nak_state = IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
qp->r_ack_psn = qp->r_psn;
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 4a91975b68d7..08813cdbd475 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -53,7 +50,8 @@
#include "hfi.h"
#include "mad.h"
#include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
+#include "trace.h"
/*
* Convert the AETH RNR timeout code into the number of microseconds.
@@ -97,16 +95,16 @@ const u32 ib_hfi1_rnr_table[32] = {
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
-static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
+static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
{
int i, j, ret;
struct ib_wc wc;
- struct hfi1_lkey_table *rkt;
- struct hfi1_pd *pd;
- struct hfi1_sge_state *ss;
+ struct rvt_lkey_table *rkt;
+ struct rvt_pd *pd;
+ struct rvt_sge_state *ss;
- rkt = &to_idev(qp->ibqp.device)->lk_table;
- pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+ rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+ pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
ss = &qp->r_sge;
ss->sg_list = qp->r_sg_list;
qp->r_len = 0;
@@ -114,8 +112,8 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
- if (!hfi1_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
j++;
@@ -127,9 +125,9 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
bad_lkey:
while (j) {
- struct hfi1_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+ struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
@@ -138,14 +136,14 @@ bad_lkey:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
/* Signal solicited completion event. */
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
bail:
return ret;
}
/**
- * hfi1_get_rwqe - copy the next RWQE into the QP's RWQE
+ * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
* @wr_id_only: update qp->r_wr_id only, not qp->r_sge
*
@@ -154,19 +152,19 @@ bail:
*
* Can be called from interrupt level.
*/
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
{
unsigned long flags;
- struct hfi1_rq *rq;
- struct hfi1_rwq *wq;
- struct hfi1_srq *srq;
- struct hfi1_rwqe *wqe;
+ struct rvt_rq *rq;
+ struct rvt_rwq *wq;
+ struct rvt_srq *srq;
+ struct rvt_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
u32 tail;
int ret;
if (qp->ibqp.srq) {
- srq = to_isrq(qp->ibqp.srq);
+ srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
@@ -176,7 +174,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
@@ -192,7 +190,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
}
/* Make sure entry is read after head index is read. */
smp_rmb();
- wqe = get_rwqe_ptr(rq, tail);
+ wqe = rvt_get_rwqe_ptr(rq, tail);
/*
* Even though we update the tail index in memory, the verbs
* consumer is not supposed to post more entries until a
@@ -208,7 +206,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
qp->r_wr_id = wqe->wr_id;
ret = 1;
- set_bit(HFI1_R_WRID_VALID, &qp->r_aflags);
+ set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
@@ -265,7 +263,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
- int has_grh, struct hfi1_qp *qp, u32 bth0)
+ int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
unsigned long flags;
@@ -279,11 +277,13 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
goto err;
guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ guid))
goto err;
- if (!gid_ok(&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ if (!gid_ok(
+ &hdr->u.l.grh.sgid,
+ qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+ qp->alt_ah_attr.grh.dgid.global.interface_id))
goto err;
}
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -312,11 +312,13 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
goto err;
guid = get_sguid(ibp,
qp->remote_ah_attr.grh.sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ guid))
goto err;
- if (!gid_ok(&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ if (!gid_ok(
+ &hdr->u.l.grh.sgid,
+ qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+ qp->remote_ah_attr.grh.dgid.global.interface_id))
goto err;
}
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -355,12 +357,12 @@ err:
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
-static void ruc_loopback(struct hfi1_qp *sqp)
+static void ruc_loopback(struct rvt_qp *sqp)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct hfi1_qp *qp;
- struct hfi1_swqe *wqe;
- struct hfi1_sge *sge;
+ struct rvt_qp *qp;
+ struct rvt_swqe *wqe;
+ struct rvt_sge *sge;
unsigned long flags;
struct ib_wc wc;
u64 sdata;
@@ -368,6 +370,8 @@ static void ruc_loopback(struct hfi1_qp *sqp)
enum ib_wc_status send_status;
int release;
int ret;
+ int copy_last = 0;
+ u32 to;
rcu_read_lock();
@@ -375,25 +379,27 @@ static void ruc_loopback(struct hfi1_qp *sqp)
* Note that we check the responder QP state after
* checking the requester's state.
*/
- qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn);
+ qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+ sqp->remote_qpn);
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT)) ||
- !(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+ if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+ !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
goto unlock;
- sqp->s_flags |= HFI1_S_BUSY;
+ sqp->s_flags |= RVT_S_BUSY;
again:
- if (sqp->s_last == sqp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
goto clr_busy;
- wqe = get_swqe_ptr(sqp, sqp->s_last);
+ wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work request. */
- if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_hfi1_state_ops[sqp->state] & HFI1_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
@@ -411,9 +417,9 @@ again:
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) ||
+ if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
@@ -439,7 +445,7 @@ again:
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
- ret = hfi1_get_rwqe(qp, 0);
+ ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto op_err;
if (!ret)
@@ -451,21 +457,24 @@ again:
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = hfi1_get_rwqe(qp, 1);
+ ret = hfi1_rvt_get_rwqe(qp, 1);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
- /* FALLTHROUGH */
+ /* skip copy_last set and qp_access_flags recheck */
+ goto do_write;
case IB_WR_RDMA_WRITE:
+ copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
+do_write:
if (wqe->length == 0)
break;
- if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
+ IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
qp->r_sge.num_sge = 1;
@@ -475,10 +484,10 @@ again:
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
- if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
+ if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
+ IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = 0;
sqp->s_sge.sg_list = NULL;
@@ -493,20 +502,20 @@ again:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
- if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
- maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
sdata = wqe->atomic_wr.compare_add;
- *(u64 *) sqp->s_sge.sge.vaddr =
+ *(u64 *)sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64) atomic64_add_return(sdata, maddr) - sdata :
- (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ (u64)atomic64_add_return(sdata, maddr) - sdata :
+ (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
sdata, wqe->atomic_wr.swap);
- hfi1_put_mr(qp->r_sge.sge.mr);
+ rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -524,17 +533,17 @@ again:
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+ hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= HFI1_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -547,9 +556,9 @@ again:
sqp->s_len -= len;
}
if (release)
- hfi1_put_ss(&qp->r_sge);
+ rvt_put_ss(&qp->r_sge);
- if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -565,12 +574,12 @@ again:
wc.sl = qp->remote_ah_attr.sl;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->n_loop_pkts++;
+ ibp->rvp.n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
hfi1_send_complete(sqp, wqe, send_status);
@@ -580,7 +589,7 @@ rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
- ibp->n_rnr_naks++;
+ ibp->rvp.n_rnr_naks++;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
@@ -592,13 +601,10 @@ rnr_nak:
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
- sqp->s_flags |= HFI1_S_WAIT_RNR;
- sqp->s_timer.function = hfi1_rc_rnr_retry;
- sqp->s_timer.expires = jiffies +
- usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]);
- add_timer(&sqp->s_timer);
+ to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
+ hfi1_add_rnr_timer(sqp, to);
goto clr_busy;
op_err:
@@ -622,9 +628,9 @@ serr:
spin_lock_irqsave(&sqp->s_lock, flags);
hfi1_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+ int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
- sqp->s_flags &= ~HFI1_S_BUSY;
+ sqp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
@@ -637,7 +643,7 @@ serr:
goto done;
}
clr_busy:
- sqp->s_flags &= ~HFI1_S_BUSY;
+ sqp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
@@ -666,7 +672,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
hdr->next_hdr = IB_GRH_NEXT_HDR;
hdr->hop_limit = grh->hop_limit;
/* The SGID is 32-bit aligned. */
- hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
hdr->sgid.global.interface_id =
grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
ibp->guids[grh->sgid_index - 1] :
@@ -690,29 +696,31 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
* Subsequent middles use the copied entry, editing the
* PSN with 1 or 2 edits.
*/
-static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
+static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
{
- if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
clear_ahg(qp);
- if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
+ if (!(qp->s_flags & RVT_S_AHG_VALID)) {
/* first middle that needs copy */
if (qp->s_ahgidx < 0)
- qp->s_ahgidx = sdma_ahg_alloc(qp->s_sde);
+ qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
if (qp->s_ahgidx >= 0) {
qp->s_ahgpsn = npsn;
- qp->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
+ priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
/* save to protect a change in another thread */
- qp->s_hdr->sde = qp->s_sde;
- qp->s_hdr->ahgidx = qp->s_ahgidx;
- qp->s_flags |= HFI1_S_AHG_VALID;
+ priv->s_hdr->sde = priv->s_sde;
+ priv->s_hdr->ahgidx = qp->s_ahgidx;
+ qp->s_flags |= RVT_S_AHG_VALID;
}
} else {
/* subsequent middle after valid */
if (qp->s_ahgidx >= 0) {
- qp->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
- qp->s_hdr->ahgidx = qp->s_ahgidx;
- qp->s_hdr->ahgcount++;
- qp->s_hdr->ahgdesc[0] =
+ priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
+ priv->s_hdr->ahgidx = qp->s_ahgidx;
+ priv->s_hdr->ahgcount++;
+ priv->s_hdr->ahgdesc[0] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16((u16)npsn),
BTH2_OFFSET,
@@ -720,8 +728,8 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
16);
if ((npsn & 0xffff0000) !=
(qp->s_ahgpsn & 0xffff0000)) {
- qp->s_hdr->ahgcount++;
- qp->s_hdr->ahgdesc[1] =
+ priv->s_hdr->ahgcount++;
+ priv->s_hdr->ahgdesc[1] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16(
(u16)(npsn >> 16)),
@@ -733,10 +741,12 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
}
}
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
- u32 bth0, u32 bth2, int middle)
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+ u32 bth0, u32 bth2, int middle,
+ struct hfi1_pkt_state *ps)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ibport *ibp = ps->ibp;
u16 lrh0;
u32 nwords;
u32 extra_bytes;
@@ -747,13 +757,14 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ qp->s_hdrwords += hfi1_make_grh(ibp,
+ &ps->s_txreq->phdr.hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
middle = 0;
}
- lrh0 |= (qp->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
/*
* reset s_hdr/AHG fields
*
@@ -765,10 +776,10 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
* build_ahg() will modify as appropriate
* to use the AHG feature.
*/
- qp->s_hdr->tx_flags = 0;
- qp->s_hdr->ahgcount = 0;
- qp->s_hdr->ahgidx = 0;
- qp->s_hdr->sde = NULL;
+ priv->s_hdr->tx_flags = 0;
+ priv->s_hdr->ahgcount = 0;
+ priv->s_hdr->ahgidx = 0;
+ priv->s_hdr->sde = NULL;
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
else
@@ -776,19 +787,19 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
if (middle)
build_ahg(qp, bth2);
else
- qp->s_flags &= ~HFI1_S_AHG_VALID;
- qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr->ibh.lrh[2] =
+ qp->s_flags &= ~RVT_S_AHG_VALID;
+ ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+ ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- qp->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
qp->remote_ah_attr.src_path_bits);
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
bth1 = qp->remote_qpn;
- if (qp->s_flags & HFI1_S_ECN) {
- qp->s_flags &= ~HFI1_S_ECN;
+ if (qp->s_flags & RVT_S_ECN) {
+ qp->s_flags &= ~RVT_S_ECN;
/* we recently received a FECN, so return a BECN */
bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
}
@@ -799,6 +810,14 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+void _hfi1_do_send(struct work_struct *work)
+{
+ struct iowait *wait = container_of(work, struct iowait, iowork);
+ struct rvt_qp *qp = iowait_to_qp(wait);
+
+ hfi1_do_send(qp);
+}
+
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
@@ -807,34 +826,45 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
* exhausted. Only allow one CPU to send a packet per QP (tasklet).
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct work_struct *work)
+void hfi1_do_send(struct rvt_qp *qp)
{
- struct iowait *wait = container_of(work, struct iowait, iowork);
- struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
struct hfi1_pkt_state ps;
- int (*make_req)(struct hfi1_qp *qp);
+ struct hfi1_qp_priv *priv = qp->priv;
+ int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
unsigned long flags;
unsigned long timeout;
+ unsigned long timeout_int;
+ int cpu;
ps.dev = to_idev(qp->ibqp.device);
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
- if ((qp->ibqp.qp_type == IB_QPT_RC ||
- qp->ibqp.qp_type == IB_QPT_UC) &&
- !loopback &&
- (qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc) - 1)) ==
- ps.ppd->lid) {
- ruc_loopback(qp);
- return;
- }
-
- if (qp->ibqp.qp_type == IB_QPT_RC)
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+ ) - 1)) ==
+ ps.ppd->lid)) {
+ ruc_loopback(qp);
+ return;
+ }
make_req = hfi1_make_rc_req;
- else if (qp->ibqp.qp_type == IB_QPT_UC)
+ timeout_int = (qp->timeout_jiffies);
+ break;
+ case IB_QPT_UC:
+ if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+ ) - 1)) ==
+ ps.ppd->lid)) {
+ ruc_loopback(qp);
+ return;
+ }
make_req = hfi1_make_uc_req;
- else
+ timeout_int = SEND_RESCHED_TIMEOUT;
+ break;
+ default:
make_req = hfi1_make_ud_req;
+ timeout_int = SEND_RESCHED_TIMEOUT;
+ }
spin_lock_irqsave(&qp->s_lock, flags);
@@ -844,57 +874,83 @@ void hfi1_do_send(struct work_struct *work)
return;
}
- qp->s_flags |= HFI1_S_BUSY;
+ qp->s_flags |= RVT_S_BUSY;
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- timeout = jiffies + SEND_RESCHED_TIMEOUT;
+ timeout = jiffies + (timeout_int) / 8;
+ cpu = priv->s_sde ? priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+ /* insure a pre-built packet is handled */
+ ps.s_txreq = get_waiting_verbs_txreq(qp);
do {
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
/*
* If the packet cannot be sent now, return and
* the send tasklet will be woken up later.
*/
if (hfi1_verbs_send(qp, &ps))
- break;
+ return;
/* Record that s_hdr is empty. */
qp->s_hdrwords = 0;
+ /* allow other tasks to run */
+ if (unlikely(time_after(jiffies, timeout))) {
+ if (workqueue_congested(cpu,
+ ps.ppd->hfi1_wq)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock,
+ flags);
+ this_cpu_inc(
+ *ps.ppd->dd->send_schedule);
+ return;
+ }
+ if (!irqs_disabled()) {
+ cond_resched();
+ this_cpu_inc(
+ *ps.ppd->dd->send_schedule);
+ }
+ timeout = jiffies + (timeout_int) / 8;
+ }
+ spin_lock_irqsave(&qp->s_lock, flags);
}
+ } while (make_req(qp, &ps));
- /* allow other tasks to run */
- if (unlikely(time_after(jiffies, timeout))) {
- cond_resched();
- ps.ppd->dd->verbs_dev.n_send_schedule++;
- timeout = jiffies + SEND_RESCHED_TIMEOUT;
- }
- } while (make_req(qp));
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
/*
* This should be called with s_lock held.
*/
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
unsigned i;
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
+ last = qp->s_last;
+ old_last = last;
+ if (++last >= qp->s_size)
+ last = 0;
+ qp->s_last = last;
+ /* See post_send() */
+ barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
- struct hfi1_sge *sge = &wqe->sg_list[i];
+ struct rvt_sge *sge = &wqe->sg_list[i];
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+ atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
@@ -906,15 +962,10 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
wc.qp = &qp->ibqp;
if (status == IB_WC_SUCCESS)
wc.byte_len = wqe->length;
- hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
}
- last = qp->s_last;
- old_last = last;
- if (++last >= qp->s_size)
- last = 0;
- qp->s_last = last;
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c
index 9a15f1f32b45..abb8ebc1fcac 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/staging/rdma/hfi1/sdma.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -112,10 +109,10 @@ MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
| SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
/* sdma_sendctrl operations */
-#define SDMA_SENDCTRL_OP_ENABLE (1U << 0)
-#define SDMA_SENDCTRL_OP_INTENABLE (1U << 1)
-#define SDMA_SENDCTRL_OP_HALT (1U << 2)
-#define SDMA_SENDCTRL_OP_CLEANUP (1U << 3)
+#define SDMA_SENDCTRL_OP_ENABLE BIT(0)
+#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
+#define SDMA_SENDCTRL_OP_HALT BIT(2)
+#define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
/* handle long defines */
#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
@@ -325,9 +322,9 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
if (lcnt++ > 500) {
/* timed out - bounce the link */
dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
- __func__, sde->this_idx, (u32)reg);
+ __func__, sde->this_idx, (u32)reg);
queue_work(dd->pport->hfi1_wq,
- &dd->pport->link_bounce_work);
+ &dd->pport->link_bounce_work);
break;
}
udelay(1);
@@ -361,6 +358,28 @@ static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
write_sde_csr(sde, SD(DESC_CNT), reg);
}
+static inline void complete_tx(struct sdma_engine *sde,
+ struct sdma_txreq *tx,
+ int res)
+{
+ /* protect against complete modifying */
+ struct iowait *wait = tx->wait;
+ callback_t complete = tx->complete;
+
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+ trace_hfi1_sdma_out_sn(sde, tx->sn);
+ if (WARN_ON_ONCE(sde->head_sn != tx->sn))
+ dd_dev_err(sde->dd, "expected %llu got %llu\n",
+ sde->head_sn, tx->sn);
+ sde->head_sn++;
+#endif
+ sdma_txclean(sde->dd, tx);
+ if (complete)
+ (*complete)(tx, res);
+ if (iowait_sdma_dec(wait) && wait)
+ iowait_drain_wakeup(wait);
+}
+
/*
* Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status
*
@@ -395,27 +414,8 @@ static void sdma_flush(struct sdma_engine *sde)
}
spin_unlock_irqrestore(&sde->flushlist_lock, flags);
/* flush from flush list */
- list_for_each_entry_safe(txp, txp_next, &flushlist, list) {
- int drained = 0;
- /* protect against complete modifying */
- struct iowait *wait = txp->wait;
-
- list_del_init(&txp->list);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
- trace_hfi1_sdma_out_sn(sde, txp->sn);
- if (WARN_ON_ONCE(sde->head_sn != txp->sn))
- dd_dev_err(sde->dd, "expected %llu got %llu\n",
- sde->head_sn, txp->sn);
- sde->head_sn++;
-#endif
- sdma_txclean(sde->dd, txp);
- if (wait)
- drained = atomic_dec_and_test(&wait->sdma_busy);
- if (txp->complete)
- (*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained);
- if (wait && drained)
- iowait_drain_wakeup(wait);
- }
+ list_for_each_entry_safe(txp, txp_next, &flushlist, list)
+ complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
}
/*
@@ -455,8 +455,8 @@ static void sdma_err_halt_wait(struct work_struct *work)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(sde->dd,
- "SDMA engine %d - timeout waiting for engine to halt\n",
- sde->this_idx);
+ "SDMA engine %d - timeout waiting for engine to halt\n",
+ sde->this_idx);
/*
* Continue anyway. This could happen if there was
* an uncorrectable error in the wrong spot.
@@ -472,7 +472,6 @@ static void sdma_err_halt_wait(struct work_struct *work)
static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
{
if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
-
unsigned index;
struct hfi1_devdata *dd = sde->dd;
@@ -531,7 +530,7 @@ static void sdma_err_progress_check(unsigned long data)
static void sdma_hw_clean_up_task(unsigned long opaque)
{
- struct sdma_engine *sde = (struct sdma_engine *) opaque;
+ struct sdma_engine *sde = (struct sdma_engine *)opaque;
u64 statuscsr;
while (1) {
@@ -577,31 +576,10 @@ static void sdma_flush_descq(struct sdma_engine *sde)
head = ++sde->descq_head & sde->sdma_mask;
/* if now past this txp's descs, do the callback */
if (txp && txp->next_descq_idx == head) {
- int drained = 0;
- /* protect against complete modifying */
- struct iowait *wait = txp->wait;
-
/* remove from list */
sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
- if (wait)
- drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
- trace_hfi1_sdma_out_sn(sde, txp->sn);
- if (WARN_ON_ONCE(sde->head_sn != txp->sn))
- dd_dev_err(sde->dd, "expected %llu got %llu\n",
- sde->head_sn, txp->sn);
- sde->head_sn++;
-#endif
- sdma_txclean(sde->dd, txp);
+ complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
trace_hfi1_sdma_progress(sde, head, tail, txp);
- if (txp->complete)
- (*txp->complete)(
- txp,
- SDMA_TXREQ_S_ABORTED,
- drained);
- if (wait && drained)
- iowait_drain_wakeup(wait);
- /* see if there is another txp */
txp = get_txhead(sde);
}
progress++;
@@ -612,7 +590,7 @@ static void sdma_flush_descq(struct sdma_engine *sde)
static void sdma_sw_clean_up_task(unsigned long opaque)
{
- struct sdma_engine *sde = (struct sdma_engine *) opaque;
+ struct sdma_engine *sde = (struct sdma_engine *)opaque;
unsigned long flags;
spin_lock_irqsave(&sde->tail_lock, flags);
@@ -627,7 +605,6 @@ static void sdma_sw_clean_up_task(unsigned long opaque)
* descq are ours to play with.
*/
-
/*
* In the error clean up sequence, software clean must be called
* before the hardware clean so we can use the hardware head in
@@ -676,7 +653,7 @@ static void sdma_start_hw_clean_up(struct sdma_engine *sde)
}
static void sdma_set_state(struct sdma_engine *sde,
- enum sdma_states next_state)
+ enum sdma_states next_state)
{
struct sdma_state *ss = &sde->state;
const struct sdma_set_state_action *action = sdma_action_table;
@@ -692,8 +669,8 @@ static void sdma_set_state(struct sdma_engine *sde,
ss->previous_op = ss->current_op;
ss->current_state = next_state;
- if (ss->previous_state != sdma_state_s99_running
- && next_state == sdma_state_s99_running)
+ if (ss->previous_state != sdma_state_s99_running &&
+ next_state == sdma_state_s99_running)
sdma_flush(sde);
if (action[next_state].op_enable)
@@ -890,6 +867,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
newmap->actual_vls = num_vls;
newmap->vls = roundup_pow_of_two(num_vls);
newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+ /* initialize back-map */
+ for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
+ newmap->engine_to_vl[i] = -1;
for (i = 0; i < newmap->vls; i++) {
/* save for wrap around */
int first_engine = engine;
@@ -913,6 +893,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
/* wrap back to first engine */
engine = first_engine;
}
+ /* assign back-map */
+ for (j = 0; j < vl_engines[i]; j++)
+ newmap->engine_to_vl[first_engine + j] = i;
} else {
/* just re-use entry without allocating */
newmap->map[i] = newmap->map[i % num_vls];
@@ -922,7 +905,7 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
/* newmap in hand, save old map */
spin_lock_irq(&dd->sde_map_lock);
oldmap = rcu_dereference_protected(dd->sdma_map,
- lockdep_is_held(&dd->sde_map_lock));
+ lockdep_is_held(&dd->sde_map_lock));
/* publish newmap */
rcu_assign_pointer(dd->sdma_map, newmap);
@@ -983,7 +966,7 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
sde->tx_ring = NULL;
}
spin_lock_irq(&dd->sde_map_lock);
- kfree(rcu_access_pointer(dd->sdma_map));
+ sdma_map_free(rcu_access_pointer(dd->sdma_map));
RCU_INIT_POINTER(dd->sdma_map, NULL);
spin_unlock_irq(&dd->sde_map_lock);
synchronize_rcu();
@@ -1020,19 +1003,19 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
return 0;
}
if (mod_num_sdma &&
- /* can't exceed chip support */
- mod_num_sdma <= dd->chip_sdma_engines &&
- /* count must be >= vls */
- mod_num_sdma >= num_vls)
+ /* can't exceed chip support */
+ mod_num_sdma <= dd->chip_sdma_engines &&
+ /* count must be >= vls */
+ mod_num_sdma >= num_vls)
num_engines = mod_num_sdma;
dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
- dd->chip_sdma_mem_size);
+ dd->chip_sdma_mem_size);
per_sdma_credits =
- dd->chip_sdma_mem_size/(num_engines * SDMA_BLOCK_SIZE);
+ dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
/* set up freeze waitqueue */
init_waitqueue_head(&dd->sdma_unfreeze_wq);
@@ -1040,7 +1023,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
descq_cnt = sdma_get_descq_cnt();
dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
- num_engines, descq_cnt);
+ num_engines, descq_cnt);
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
@@ -1061,18 +1044,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
sde->desc_avail = sdma_descq_freecnt(sde);
sde->sdma_shift = ilog2(descq_cnt);
sde->sdma_mask = (1 << sde->sdma_shift) - 1;
- sde->descq_full_count = 0;
-
- /* Create a mask for all 3 chip interrupt sources */
- sde->imask = (u64)1 << (0*TXE_NUM_SDMA_ENGINES + this_idx)
- | (u64)1 << (1*TXE_NUM_SDMA_ENGINES + this_idx)
- | (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
- /* Create a mask specifically for sdma_idle */
- sde->idle_mask =
- (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
- /* Create a mask specifically for sdma_progress */
- sde->progress_mask =
- (u64)1 << (TXE_NUM_SDMA_ENGINES + this_idx);
+
+ /* Create a mask specifically for each interrupt source */
+ sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
+ this_idx);
+ sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
+ this_idx);
+ sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
+ this_idx);
+ /* Create a combined mask to cover all 3 interrupt sources */
+ sde->imask = sde->int_mask | sde->progress_mask |
+ sde->idle_mask;
+
spin_lock_init(&sde->tail_lock);
seqlock_init(&sde->head_lock);
spin_lock_init(&sde->senddmactrl_lock);
@@ -1100,10 +1083,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
SDMA_DESC1_INT_REQ_FLAG;
tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
- (unsigned long)sde);
+ (unsigned long)sde);
tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)sde);
+ (unsigned long)sde);
INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
INIT_WORK(&sde->flush_worker, sdma_field_flush);
@@ -1251,11 +1234,10 @@ void sdma_exit(struct hfi1_devdata *dd)
for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
++this_idx) {
-
sde = &dd->per_sdma[this_idx];
if (!list_empty(&sde->dmawait))
dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
- sde->this_idx);
+ sde->this_idx);
sdma_process_event(sde, sdma_event_e00_go_hw_down);
del_timer_sync(&sde->err_progress_check_timer);
@@ -1358,8 +1340,8 @@ retry:
use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
(dd->flags & HFI1_HAS_SDMA_TIMEOUT);
hwhead = use_dmahead ?
- (u16) le64_to_cpu(*sde->head_dma) :
- (u16) read_sde_csr(sde, SD(HEAD));
+ (u16)le64_to_cpu(*sde->head_dma) :
+ (u16)read_sde_csr(sde, SD(HEAD));
if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
u16 cnt;
@@ -1385,9 +1367,9 @@ retry:
if (unlikely(!sane)) {
dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
- sde->this_idx,
- use_dmahead ? "dma" : "kreg",
- hwhead, swhead, swtail, cnt);
+ sde->this_idx,
+ use_dmahead ? "dma" : "kreg",
+ hwhead, swhead, swtail, cnt);
if (use_dmahead) {
/* try one more time, using csr */
use_dmahead = 0;
@@ -1464,7 +1446,7 @@ static void sdma_make_progress(struct sdma_engine *sde, u64 status)
{
struct sdma_txreq *txp = NULL;
int progress = 0;
- u16 hwhead, swhead, swtail;
+ u16 hwhead, swhead;
int idle_check_done = 0;
hwhead = sdma_gethead(sde);
@@ -1485,29 +1467,9 @@ retry:
/* if now past this txp's descs, do the callback */
if (txp && txp->next_descq_idx == swhead) {
- int drained = 0;
- /* protect against complete modifying */
- struct iowait *wait = txp->wait;
-
/* remove from list */
sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
- if (wait)
- drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
- trace_hfi1_sdma_out_sn(sde, txp->sn);
- if (WARN_ON_ONCE(sde->head_sn != txp->sn))
- dd_dev_err(sde->dd, "expected %llu got %llu\n",
- sde->head_sn, txp->sn);
- sde->head_sn++;
-#endif
- sdma_txclean(sde->dd, txp);
- if (txp->complete)
- (*txp->complete)(
- txp,
- SDMA_TXREQ_S_OK,
- drained);
- if (wait && drained)
- iowait_drain_wakeup(wait);
+ complete_tx(sde, txp, SDMA_TXREQ_S_OK);
/* see if there is another txp */
txp = get_txhead(sde);
}
@@ -1525,6 +1487,8 @@ retry:
* of sdma_make_progress(..) which is ensured by idle_check_done flag
*/
if ((status & sde->idle_mask) && !idle_check_done) {
+ u16 swtail;
+
swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
if (swtail != hwhead) {
hwhead = (u16)read_sde_csr(sde, SD(HEAD));
@@ -1552,6 +1516,12 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
trace_hfi1_sdma_engine_interrupt(sde, status);
write_seqlock(&sde->head_lock);
sdma_set_desc_cnt(sde, sdma_desct_intr);
+ if (status & sde->idle_mask)
+ sde->idle_int_cnt++;
+ else if (status & sde->progress_mask)
+ sde->progress_int_cnt++;
+ else if (status & sde->int_mask)
+ sde->sdma_int_cnt++;
sdma_make_progress(sde, status);
write_sequnlock(&sde->head_lock);
}
@@ -1577,10 +1547,10 @@ void sdma_engine_error(struct sdma_engine *sde, u64 status)
__sdma_process_event(sde, sdma_event_e60_hw_halted);
if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
dd_dev_err(sde->dd,
- "SDMA (%u) engine error: 0x%llx state %s\n",
- sde->this_idx,
- (unsigned long long)status,
- sdma_state_names[sde->state.current_state]);
+ "SDMA (%u) engine error: 0x%llx state %s\n",
+ sde->this_idx,
+ (unsigned long long)status,
+ sdma_state_names[sde->state.current_state]);
dump_sdma_state(sde);
}
write_sequnlock(&sde->head_lock);
@@ -1624,8 +1594,8 @@ static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
if (op & SDMA_SENDCTRL_OP_CLEANUP)
write_sde_csr(sde, SD(CTRL),
- sde->p_senddmactrl |
- SD(CTRL_SDMA_CLEANUP_SMASK));
+ sde->p_senddmactrl |
+ SD(CTRL_SDMA_CLEANUP_SMASK));
else
write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
@@ -1649,12 +1619,10 @@ static void sdma_setlengen(struct sdma_engine *sde)
* generation counter.
*/
write_sde_csr(sde, SD(LEN_GEN),
- (sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT)
- );
+ (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
write_sde_csr(sde, SD(LEN_GEN),
- ((sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT))
- | (4ULL << SD(LEN_GEN_GENERATION_SHIFT))
- );
+ ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
+ (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
}
static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
@@ -1714,7 +1682,6 @@ static void set_sdma_integrity(struct sdma_engine *sde)
write_sde_csr(sde, SD(CHECK_ENABLE), reg);
}
-
static void init_sdma_regs(
struct sdma_engine *sde,
u32 credits,
@@ -1735,17 +1702,16 @@ static void init_sdma_regs(
write_sde_csr(sde, SD(DESC_CNT), 0);
write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
write_sde_csr(sde, SD(MEMORY),
- ((u64)credits <<
- SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
- ((u64)(credits * sde->this_idx) <<
- SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
+ ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
+ ((u64)(credits * sde->this_idx) <<
+ SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
set_sdma_integrity(sde);
opmask = OPCODE_CHECK_MASK_DISABLED;
opval = OPCODE_CHECK_VAL_DISABLED;
write_sde_csr(sde, SD(CHECK_OPCODE),
- (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
- (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
+ (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
+ (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
}
#ifdef CONFIG_SDMA_VERBOSITY
@@ -1824,12 +1790,9 @@ static void dump_sdma_state(struct sdma_engine *sde)
descq = sde->descq;
dd_dev_err(sde->dd,
- "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
- sde->this_idx,
- head,
- tail,
- cnt,
- !list_empty(&sde->flushlist));
+ "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
+ sde->this_idx, head, tail, cnt,
+ !list_empty(&sde->flushlist));
/* print info for each entry in the descriptor queue */
while (head != tail) {
@@ -1850,20 +1813,23 @@ static void dump_sdma_state(struct sdma_engine *sde)
len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
& SDMA_DESC0_BYTE_COUNT_MASK;
dd_dev_err(sde->dd,
- "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
- head, flags, addr, gen, len);
+ "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+ head, flags, addr, gen, len);
dd_dev_err(sde->dd,
- "\tdesc0:0x%016llx desc1 0x%016llx\n",
- desc[0], desc[1]);
+ "\tdesc0:0x%016llx desc1 0x%016llx\n",
+ desc[0], desc[1]);
if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
dd_dev_err(sde->dd,
- "\taidx: %u amode: %u alen: %u\n",
- (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
- >> SDMA_DESC1_HEADER_INDEX_SHIFT),
- (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
- >> SDMA_DESC1_HEADER_MODE_SHIFT),
- (u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK)
- >> SDMA_DESC1_HEADER_DWS_SHIFT));
+ "\taidx: %u amode: %u alen: %u\n",
+ (u8)((desc[1] &
+ SDMA_DESC1_HEADER_INDEX_SMASK) >>
+ SDMA_DESC1_HEADER_INDEX_SHIFT),
+ (u8)((desc[1] &
+ SDMA_DESC1_HEADER_MODE_SMASK) >>
+ SDMA_DESC1_HEADER_MODE_SHIFT),
+ (u8)((desc[1] &
+ SDMA_DESC1_HEADER_DWS_SMASK) >>
+ SDMA_DESC1_HEADER_DWS_SHIFT));
head++;
head &= sde->sdma_mask;
}
@@ -1890,29 +1856,26 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
head = sde->descq_head & sde->sdma_mask;
tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
seq_printf(s, SDE_FMT, sde->this_idx,
- sde->cpu,
- sdma_state_name(sde->state.current_state),
- (unsigned long long)read_sde_csr(sde, SD(CTRL)),
- (unsigned long long)read_sde_csr(sde, SD(STATUS)),
- (unsigned long long)read_sde_csr(sde,
- SD(ENG_ERR_STATUS)),
- (unsigned long long)read_sde_csr(sde, SD(TAIL)),
- tail,
- (unsigned long long)read_sde_csr(sde, SD(HEAD)),
- head,
- (unsigned long long)le64_to_cpu(*sde->head_dma),
- (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
- (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
- (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
- (unsigned long long)sde->last_status,
- (unsigned long long)sde->ahg_bits,
- sde->tx_tail,
- sde->tx_head,
- sde->descq_tail,
- sde->descq_head,
+ sde->cpu,
+ sdma_state_name(sde->state.current_state),
+ (unsigned long long)read_sde_csr(sde, SD(CTRL)),
+ (unsigned long long)read_sde_csr(sde, SD(STATUS)),
+ (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
+ (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
+ (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
+ (unsigned long long)le64_to_cpu(*sde->head_dma),
+ (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
+ (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
+ (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
+ (unsigned long long)sde->last_status,
+ (unsigned long long)sde->ahg_bits,
+ sde->tx_tail,
+ sde->tx_head,
+ sde->descq_tail,
+ sde->descq_head,
!list_empty(&sde->flushlist),
- sde->descq_full_count,
- (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
+ sde->descq_full_count,
+ (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
/* print info for each entry in the descriptor queue */
while (head != tail) {
@@ -1933,14 +1896,16 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
& SDMA_DESC0_BYTE_COUNT_MASK;
seq_printf(s,
- "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
- head, flags, addr, gen, len);
+ "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+ head, flags, addr, gen, len);
if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
- (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
- >> SDMA_DESC1_HEADER_INDEX_SHIFT),
- (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
- >> SDMA_DESC1_HEADER_MODE_SHIFT));
+ (u8)((desc[1] &
+ SDMA_DESC1_HEADER_INDEX_SMASK) >>
+ SDMA_DESC1_HEADER_INDEX_SHIFT),
+ (u8)((desc[1] &
+ SDMA_DESC1_HEADER_MODE_SMASK) >>
+ SDMA_DESC1_HEADER_MODE_SHIFT));
head = (head + 1) & sde->sdma_mask;
}
}
@@ -2041,8 +2006,9 @@ static int sdma_check_progress(
ret = wait->sleep(sde, wait, tx, seq);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
- } else
+ } else {
ret = -EBUSY;
+ }
return ret;
}
@@ -2080,14 +2046,14 @@ retry:
goto nodesc;
tail = submit_tx(sde, tx);
if (wait)
- atomic_inc(&wait->sdma_busy);
+ iowait_sdma_inc(wait);
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret;
unlock_noconn:
if (wait)
- atomic_inc(&wait->sdma_busy);
+ iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@@ -2132,13 +2098,12 @@ nodesc:
* side locking.
*
* Return:
- * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring
- * (wait == NULL)
+ * > 0 - Success (value is number of sdma_txreq's submitted),
+ * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
-int sdma_send_txlist(struct sdma_engine *sde,
- struct iowait *wait,
- struct list_head *tx_list)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
+ struct list_head *tx_list)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
@@ -2169,18 +2134,18 @@ retry:
}
update_tail:
if (wait)
- atomic_add(count, &wait->sdma_busy);
+ iowait_sdma_add(wait, count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
- return ret;
+ return ret == 0 ? count : ret;
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
list_del_init(&tx->list);
if (wait)
- atomic_inc(&wait->sdma_busy);
+ iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@@ -2206,8 +2171,7 @@ nodesc:
goto update_tail;
}
-static void sdma_process_event(struct sdma_engine *sde,
- enum sdma_events event)
+static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
{
unsigned long flags;
@@ -2224,7 +2188,7 @@ static void sdma_process_event(struct sdma_engine *sde,
}
static void __sdma_process_event(struct sdma_engine *sde,
- enum sdma_events event)
+ enum sdma_events event)
{
struct sdma_state *ss = &sde->state;
int need_progress = 0;
@@ -2247,14 +2211,15 @@ static void __sdma_process_event(struct sdma_engine *sde,
* of link up, then we need to start up.
* This can happen when hw down is requested while
* bringing the link up with traffic active on
- * 7220, e.g. */
+ * 7220, e.g.
+ */
ss->go_s99_running = 1;
/* fall through and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
sdma_set_state(sde,
- sdma_state_s10_hw_start_up_halt_wait);
+ sdma_state_s10_hw_start_up_halt_wait);
break;
case sdma_event_e15_hw_halt_done:
break;
@@ -2292,7 +2257,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
break;
case sdma_event_e15_hw_halt_done:
sdma_set_state(sde,
- sdma_state_s15_hw_start_up_clean_wait);
+ sdma_state_s15_hw_start_up_clean_wait);
sdma_start_hw_clean_up(sde);
break;
case sdma_event_e25_hw_clean_up_done:
@@ -2767,7 +2732,7 @@ enomem:
* This function calls _extend_sdma_tx_descs to extend or allocate
* coalesce buffer. If there is a allocated coalesce buffer, it will
* copy the input packet data into the coalesce buffer. It also adds
- * coalesce buffer descriptor once whe whole packet is received.
+ * coalesce buffer descriptor once when whole packet is received.
*
* Return:
* <0 - error
@@ -3030,7 +2995,8 @@ void sdma_freeze(struct hfi1_devdata *dd)
* continuing.
*/
ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
- atomic_read(&dd->sdma_unfreeze_count) <= 0);
+ atomic_read(&dd->sdma_unfreeze_count) <=
+ 0);
/* interrupted or count is negative, then unloading - just exit */
if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
return;
@@ -3047,7 +3013,7 @@ void sdma_freeze(struct hfi1_devdata *dd)
* software clean will read engine CSRs, so must be completed before
* the next step, which will clear the engine CSRs.
*/
- (void) wait_event_interruptible(dd->sdma_unfreeze_wq,
+ (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
atomic_read(&dd->sdma_unfreeze_count) <= 0);
/* no need to check results - done no matter what */
}
@@ -3067,7 +3033,7 @@ void sdma_unfreeze(struct hfi1_devdata *dd)
/* tell all engines start freeze clean up */
for (i = 0; i < dd->num_sdma; i++)
sdma_process_event(&dd->per_sdma[i],
- sdma_event_e82_hw_unfreeze);
+ sdma_event_e82_hw_unfreeze);
}
/**
@@ -3081,5 +3047,6 @@ void _sdma_engine_progress_schedule(
trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
/* assume we have selected a good cpu */
write_csr(sde->dd,
- CCE_INT_FORCE + (8*(IS_SDMA_START/64)), sde->progress_mask);
+ CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
+ sde->progress_mask);
}
diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h
index da89e6458162..8f50c99fe711 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/staging/rdma/hfi1/sdma.h
@@ -1,14 +1,13 @@
#ifndef _HFI1_SDMA_H
#define _HFI1_SDMA_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -58,15 +55,13 @@
#include "hfi.h"
#include "verbs.h"
+#include "sdma_txreq.h"
-/* increased for AHG */
-#define NUM_DESC 6
/* Hardware limit */
#define MAX_DESC 64
/* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
-
#define SDMA_TXREQ_S_OK 0
#define SDMA_TXREQ_S_SENDERROR 1
#define SDMA_TXREQ_S_ABORTED 2
@@ -109,8 +104,8 @@
/*
* Bits defined in the send DMA descriptor.
*/
-#define SDMA_DESC0_FIRST_DESC_FLAG (1ULL << 63)
-#define SDMA_DESC0_LAST_DESC_FLAG (1ULL << 62)
+#define SDMA_DESC0_FIRST_DESC_FLAG BIT_ULL(63)
+#define SDMA_DESC0_LAST_DESC_FLAG BIT_ULL(62)
#define SDMA_DESC0_BYTE_COUNT_SHIFT 48
#define SDMA_DESC0_BYTE_COUNT_WIDTH 14
#define SDMA_DESC0_BYTE_COUNT_MASK \
@@ -154,8 +149,8 @@
((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
#define SDMA_DESC1_GENERATION_SMASK \
(SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
-#define SDMA_DESC1_INT_REQ_FLAG (1ULL << 1)
-#define SDMA_DESC1_HEAD_TO_HOST_FLAG (1ULL << 0)
+#define SDMA_DESC1_INT_REQ_FLAG BIT_ULL(1)
+#define SDMA_DESC1_HEAD_TO_HOST_FLAG BIT_ULL(0)
enum sdma_states {
sdma_state_s00_hw_down,
@@ -311,83 +306,6 @@ struct hw_sdma_desc {
__le64 qw[2];
};
-/*
- * struct sdma_desc - canonical fragment descriptor
- *
- * This is the descriptor carried in the tx request
- * corresponding to each fragment.
- *
- */
-struct sdma_desc {
- /* private: don't use directly */
- u64 qw[2];
-};
-
-struct sdma_txreq;
-typedef void (*callback_t)(struct sdma_txreq *, int, int);
-
-/**
- * struct sdma_txreq - the sdma_txreq structure (one per packet)
- * @list: for use by user and by queuing for wait
- *
- * This is the representation of a packet which consists of some
- * number of fragments. Storage is provided to within the structure.
- * for all fragments.
- *
- * The storage for the descriptors are automatically extended as needed
- * when the currently allocation is exceeded.
- *
- * The user (Verbs or PSM) may overload this structure with fields
- * specific to their use by putting this struct first in their struct.
- * The method of allocation of the overloaded structure is user dependent
- *
- * The list is the only public field in the structure.
- *
- */
-
-struct sdma_txreq {
- struct list_head list;
- /* private: */
- struct sdma_desc *descp;
- /* private: */
- void *coalesce_buf;
- /* private: */
- u16 coalesce_idx;
- /* private: */
- struct iowait *wait;
- /* private: */
- callback_t complete;
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
- u64 sn;
-#endif
- /* private: - used in coalesce/pad processing */
- u16 packet_len;
- /* private: - down-counted to trigger last */
- u16 tlen;
- /* private: flags */
- u16 flags;
- /* private: */
- u16 num_desc;
- /* private: */
- u16 desc_limit;
- /* private: */
- u16 next_descq_idx;
- /* private: */
- struct sdma_desc descs[NUM_DESC];
-};
-
-struct verbs_txreq {
- struct hfi1_pio_header phdr;
- struct sdma_txreq txreq;
- struct hfi1_qp *qp;
- struct hfi1_swqe *wqe;
- struct hfi1_mregion *mr;
- struct hfi1_sge_state *ss;
- struct sdma_engine *sde;
- u16 hdr_dwords;
- u16 hdr_inx;
-};
-
/**
* struct sdma_engine - Data pertaining to each SDMA engine.
* @dd: a back-pointer to the device data
@@ -409,6 +327,7 @@ struct sdma_engine {
u64 imask; /* clear interrupt mask */
u64 idle_mask;
u64 progress_mask;
+ u64 int_mask;
/* private: */
volatile __le64 *head_dma; /* DMA'ed by chip */
/* private: */
@@ -465,6 +384,12 @@ struct sdma_engine {
u16 tx_head;
/* private: */
u64 last_status;
+ /* private */
+ u64 err_cnt;
+ /* private */
+ u64 sdma_int_cnt;
+ u64 idle_int_cnt;
+ u64 progress_int_cnt;
/* private: */
struct list_head dmawait;
@@ -484,12 +409,12 @@ struct sdma_engine {
u32 progress_check_head;
/* private: */
struct work_struct flush_worker;
+ /* protect flush list */
spinlock_t flushlist_lock;
/* private: */
struct list_head flushlist;
};
-
int sdma_init(struct hfi1_devdata *dd, u8 port);
void sdma_start(struct hfi1_devdata *dd);
void sdma_exit(struct hfi1_devdata *dd);
@@ -535,7 +460,6 @@ static inline int __sdma_running(struct sdma_engine *engine)
return engine->state.current_state == sdma_state_s99_running;
}
-
/**
* sdma_running() - state suitability test
* @engine: sdma engine
@@ -565,7 +489,6 @@ void _sdma_txreq_ahgadd(
u32 *ahg,
u8 ahg_hlen);
-
/**
* sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
* @tx: tx request to initialize
@@ -626,7 +549,7 @@ static inline int sdma_txinit_ahg(
u8 num_ahg,
u32 *ahg,
u8 ahg_hlen,
- void (*cb)(struct sdma_txreq *, int, int))
+ void (*cb)(struct sdma_txreq *, int))
{
if (tlen == 0)
return -ENODATA;
@@ -640,7 +563,8 @@ static inline int sdma_txinit_ahg(
tx->complete = cb;
tx->coalesce_buf = NULL;
tx->wait = NULL;
- tx->tlen = tx->packet_len = tlen;
+ tx->packet_len = tlen;
+ tx->tlen = tx->packet_len;
tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
tx->descs[0].qw[1] = 0;
if (flags & SDMA_TXREQ_F_AHG_COPY)
@@ -689,7 +613,7 @@ static inline int sdma_txinit(
struct sdma_txreq *tx,
u16 flags,
u16 tlen,
- void (*cb)(struct sdma_txreq *, int, int))
+ void (*cb)(struct sdma_txreq *, int))
{
return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
}
@@ -753,7 +677,7 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
dd->default_desc1;
if (tx->flags & SDMA_TXREQ_F_URGENT)
tx->descp[tx->num_desc].qw[1] |=
- (SDMA_DESC1_HEAD_TO_HOST_FLAG|
+ (SDMA_DESC1_HEAD_TO_HOST_FLAG |
SDMA_DESC1_INT_REQ_FLAG);
}
@@ -1080,6 +1004,7 @@ struct sdma_map_elem {
/**
* struct sdma_map_el - mapping for a vl
+ * @engine_to_vl - map of an engine to a vl
* @list - rcu head for free callback
* @mask - vl mask to "mod" the vl to produce an index to map array
* @actual_vls - number of vls
@@ -1091,6 +1016,7 @@ struct sdma_map_elem {
* in turn point to an array of sde's for that vl.
*/
struct sdma_vl_map {
+ s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
struct rcu_head list;
u32 mask;
u8 actual_vls;
diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h
new file mode 100644
index 000000000000..bf7d777d756e
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/sdma_txreq.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_SDMA_TXREQ_H
+#define HFI1_SDMA_TXREQ_H
+
+/* increased for AHG */
+#define NUM_DESC 6
+
+/*
+ * struct sdma_desc - canonical fragment descriptor
+ *
+ * This is the descriptor carried in the tx request
+ * corresponding to each fragment.
+ *
+ */
+struct sdma_desc {
+ /* private: don't use directly */
+ u64 qw[2];
+};
+
+/**
+ * struct sdma_txreq - the sdma_txreq structure (one per packet)
+ * @list: for use by user and by queuing for wait
+ *
+ * This is the representation of a packet which consists of some
+ * number of fragments. Storage is provided to within the structure.
+ * for all fragments.
+ *
+ * The storage for the descriptors are automatically extended as needed
+ * when the currently allocation is exceeded.
+ *
+ * The user (Verbs or PSM) may overload this structure with fields
+ * specific to their use by putting this struct first in their struct.
+ * The method of allocation of the overloaded structure is user dependent
+ *
+ * The list is the only public field in the structure.
+ *
+ */
+
+#define SDMA_TXREQ_S_OK 0
+#define SDMA_TXREQ_S_SENDERROR 1
+#define SDMA_TXREQ_S_ABORTED 2
+#define SDMA_TXREQ_S_SHUTDOWN 3
+
+/* flags bits */
+#define SDMA_TXREQ_F_URGENT 0x0001
+#define SDMA_TXREQ_F_AHG_COPY 0x0002
+#define SDMA_TXREQ_F_USE_AHG 0x0004
+
+struct sdma_txreq;
+typedef void (*callback_t)(struct sdma_txreq *, int);
+
+struct iowait;
+struct sdma_txreq {
+ struct list_head list;
+ /* private: */
+ struct sdma_desc *descp;
+ /* private: */
+ void *coalesce_buf;
+ /* private: */
+ struct iowait *wait;
+ /* private: */
+ callback_t complete;
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+ u64 sn;
+#endif
+ /* private: - used in coalesce/pad processing */
+ u16 packet_len;
+ /* private: - down-counted to trigger last */
+ u16 tlen;
+ /* private: */
+ u16 num_desc;
+ /* private: */
+ u16 desc_limit;
+ /* private: */
+ u16 next_descq_idx;
+ /* private: */
+ u16 coalesce_idx;
+ /* private: flags */
+ u16 flags;
+ /* private: */
+ struct sdma_desc descs[NUM_DESC];
+};
+
+static inline int sdma_txreq_built(struct sdma_txreq *tx)
+{
+ return tx->num_desc;
+}
+
+#endif /* HFI1_SDMA_TXREQ_H */
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c
index 1dd6727dd5ef..c7f1271190af 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/staging/rdma/hfi1/sysfs.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -53,7 +50,6 @@
#include "mad.h"
#include "trace.h"
-
/*
* Start of per-port congestion control structures and support code
*/
@@ -62,8 +58,8 @@
* Congestion control table size followed by table entries
*/
static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t pos, size_t count)
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
int ret;
struct hfi1_pportdata *ppd =
@@ -84,7 +80,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
rcu_read_lock();
cc_state = get_cc_state(ppd);
- if (cc_state == NULL) {
+ if (!cc_state) {
rcu_read_unlock();
return -EINVAL;
}
@@ -99,10 +95,6 @@ static void port_release(struct kobject *kobj)
/* nothing to do since memory is freed by hfi1_free_devdata() */
}
-static struct kobj_type port_cc_ktype = {
- .release = port_release,
-};
-
static struct bin_attribute cc_table_bin_attr = {
.attr = {.name = "cc_table_bin", .mode = 0444},
.read = read_cc_table_bin,
@@ -115,8 +107,8 @@ static struct bin_attribute cc_table_bin_attr = {
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t pos, size_t count)
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
int ret;
struct hfi1_pportdata *ppd =
@@ -135,7 +127,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
rcu_read_lock();
cc_state = get_cc_state(ppd);
- if (cc_state == NULL) {
+ if (!cc_state) {
rcu_read_unlock();
return -EINVAL;
}
@@ -151,6 +143,68 @@ static struct bin_attribute cc_setting_bin_attr = {
.size = PAGE_SIZE,
};
+struct hfi1_port_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct hfi1_pportdata *, char *);
+ ssize_t (*store)(struct hfi1_pportdata *, const char *, size_t);
+};
+
+static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
+{
+ return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+}
+
+static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ if (!memcmp(buf, "on", 2))
+ ppd->cc_prescan = true;
+ else if (!memcmp(buf, "off", 3))
+ ppd->cc_prescan = false;
+
+ return count;
+}
+
+static struct hfi1_port_attr cc_prescan_attr =
+ __ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store);
+
+static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct hfi1_port_attr *port_attr =
+ container_of(attr, struct hfi1_port_attr, attr);
+ struct hfi1_pportdata *ppd =
+ container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+ return port_attr->show(ppd, buf);
+}
+
+static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hfi1_port_attr *port_attr =
+ container_of(attr, struct hfi1_port_attr, attr);
+ struct hfi1_pportdata *ppd =
+ container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+ return port_attr->store(ppd, buf, count);
+}
+
+static const struct sysfs_ops port_cc_sysfs_ops = {
+ .show = cc_attr_show,
+ .store = cc_attr_store
+};
+
+static struct attribute *port_cc_default_attributes[] = {
+ &cc_prescan_attr.attr
+};
+
+static struct kobj_type port_cc_ktype = {
+ .release = port_release,
+ .sysfs_ops = &port_cc_sysfs_ops,
+ .default_attrs = port_cc_default_attributes
+};
+
/* Start sc2vl */
#define HFI1_SC2VL_ATTR(N) \
static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
@@ -196,7 +250,6 @@ HFI1_SC2VL_ATTR(29);
HFI1_SC2VL_ATTR(30);
HFI1_SC2VL_ATTR(31);
-
static struct attribute *sc2vl_default_attributes[] = {
&hfi1_sc2vl_attr_0.attr,
&hfi1_sc2vl_attr_1.attr,
@@ -302,7 +355,6 @@ HFI1_SL2SC_ATTR(29);
HFI1_SL2SC_ATTR(30);
HFI1_SL2SC_ATTR(31);
-
static struct attribute *sl2sc_default_attributes[] = {
&hfi1_sl2sc_attr_0.attr,
&hfi1_sl2sc_attr_1.attr,
@@ -435,7 +487,6 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
.default_attrs = vl2mtu_default_attributes
};
-
/* end of per-port file structures and support code */
/*
@@ -446,7 +497,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
@@ -455,7 +506,7 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@@ -470,19 +521,18 @@ static ssize_t show_boardversion(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
-
static ssize_t show_nctxts(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/*
@@ -497,10 +547,10 @@ static ssize_t show_nctxts(struct device *device,
}
static ssize_t show_nfreectxts(struct device *device,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
@@ -511,11 +561,10 @@ static ssize_t show_serial(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
-
}
static ssize_t store_chip_reset(struct device *device,
@@ -523,7 +572,7 @@ static ssize_t store_chip_reset(struct device *device,
size_t count)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@@ -552,7 +601,7 @@ static ssize_t show_tempsense(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, ibdev.dev);
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
struct hfi1_temp temp;
int ret;
@@ -608,8 +657,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
if (!port_num || port_num > dd->num_pports) {
dd_dev_err(dd,
- "Skipping infiniband class with invalid port %u\n",
- port_num);
+ "Skipping infiniband class with invalid port %u\n",
+ port_num);
return -ENODEV;
}
ppd = &dd->pport[port_num - 1];
@@ -644,39 +693,36 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
}
kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
-
ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
kobj, "CCMgtA");
if (ret) {
dd_dev_err(dd,
- "Skipping Congestion Control sysfs info, (err %d) port %u\n",
- ret, port_num);
+ "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+ ret, port_num);
goto bail_vl2mtu;
}
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
- ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
- &cc_setting_bin_attr);
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
if (ret) {
dd_dev_err(dd,
- "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
- ret, port_num);
+ "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+ ret, port_num);
goto bail_cc;
}
- ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
- &cc_table_bin_attr);
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr);
if (ret) {
dd_dev_err(dd,
- "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
- ret, port_num);
+ "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+ ret, port_num);
goto bail_cc_entry_bin;
}
dd_dev_info(dd,
- "IB%u: Congestion Control Agent enabled for port %d\n",
- dd->unit, port_num);
+ "IB%u: Congestion Control Agent enabled for port %d\n",
+ dd->unit, port_num);
return 0;
@@ -700,7 +746,7 @@ bail:
*/
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
{
- struct ib_device *dev = &dd->verbs_dev.ibdev;
+ struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
int i, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c
index 10122e84cb2f..8b62fefcf903 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/staging/rdma/hfi1/trace.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -109,17 +106,17 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
trace_seq_printf(p, IMM_PRN,
- be32_to_cpu(eh->imm_data));
+ be32_to_cpu(eh->imm_data));
break;
/* reth + imm */
case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, RETH_PRN " " IMM_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
- be32_to_cpu(eh->rc.reth.rkey),
- be32_to_cpu(eh->rc.reth.length),
- be32_to_cpu(eh->rc.imm_data));
+ (unsigned long long)ib_u64_get(
+ (__be32 *)&eh->rc.reth.vaddr),
+ be32_to_cpu(eh->rc.reth.rkey),
+ be32_to_cpu(eh->rc.reth.length),
+ be32_to_cpu(eh->rc.imm_data));
break;
/* reth */
case OP(RC, RDMA_READ_REQUEST):
@@ -128,10 +125,10 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY):
case OP(UC, RDMA_WRITE_ONLY):
trace_seq_printf(p, RETH_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
- be32_to_cpu(eh->rc.reth.rkey),
- be32_to_cpu(eh->rc.reth.length));
+ (unsigned long long)ib_u64_get(
+ (__be32 *)&eh->rc.reth.vaddr),
+ be32_to_cpu(eh->rc.reth.rkey),
+ be32_to_cpu(eh->rc.reth.length));
break;
case OP(RC, RDMA_READ_RESPONSE_FIRST):
case OP(RC, RDMA_READ_RESPONSE_LAST):
@@ -154,19 +151,20 @@ const char *parse_everbs_hdrs(
case OP(RC, COMPARE_SWAP):
case OP(RC, FETCH_ADD):
trace_seq_printf(p, ATOMICETH_PRN,
- (unsigned long long)ib_u64_get(eh->atomic_eth.vaddr),
- eh->atomic_eth.rkey,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.swap_data),
- (unsigned long long) ib_u64_get(
+ (unsigned long long)ib_u64_get(
+ eh->atomic_eth.vaddr),
+ eh->atomic_eth.rkey,
+ (unsigned long long)ib_u64_get(
+ (__be32 *)&eh->atomic_eth.swap_data),
+ (unsigned long long)ib_u64_get(
(__be32 *)&eh->atomic_eth.compare_data));
break;
/* deth */
case OP(UD, SEND_ONLY):
case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, DETH_PRN,
- be32_to_cpu(eh->ud.deth[0]),
- be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK);
+ be32_to_cpu(eh->ud.deth[0]),
+ be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
break;
}
trace_seq_putc(p, 0);
@@ -187,12 +185,12 @@ const char *parse_sdma_flags(
trace_seq_printf(p, "%s", flags);
if (desc0 & SDMA_DESC0_FIRST_DESC_FLAG)
trace_seq_printf(p, " amode:%u aidx:%u alen:%u",
- (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT)
- & SDMA_DESC1_HEADER_MODE_MASK),
- (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT)
- & SDMA_DESC1_HEADER_INDEX_MASK),
- (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT)
- & SDMA_DESC1_HEADER_DWS_MASK));
+ (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT) &
+ SDMA_DESC1_HEADER_MODE_MASK),
+ (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT) &
+ SDMA_DESC1_HEADER_INDEX_MASK),
+ (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT) &
+ SDMA_DESC1_HEADER_DWS_MASK));
return ret;
}
@@ -234,3 +232,4 @@ __hfi1_trace_fn(DC8051);
__hfi1_trace_fn(FIRMWARE);
__hfi1_trace_fn(RCVCTRL);
__hfi1_trace_fn(TID);
+__hfi1_trace_fn(MMU);
diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h
index 86c12ebfd4f0..963dc948c38a 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/staging/rdma/hfi1/trace.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -76,304 +73,295 @@ __print_symbolic(etype, \
#define TRACE_SYSTEM hfi1_rx
TRACE_EVENT(hfi1_rcvhdr,
- TP_PROTO(struct hfi1_devdata *dd,
- u64 eflags,
- u32 ctxt,
- u32 etype,
- u32 hlen,
- u32 tlen,
- u32 updegr,
- u32 etail),
- TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u64, eflags)
- __field(u32, ctxt)
- __field(u32, etype)
- __field(u32, hlen)
- __field(u32, tlen)
- __field(u32, updegr)
- __field(u32, etail)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->eflags = eflags;
- __entry->ctxt = ctxt;
- __entry->etype = etype;
- __entry->hlen = hlen;
- __entry->tlen = tlen;
- __entry->updegr = updegr;
- __entry->etail = etail;
- ),
- TP_printk(
-"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
- __get_str(dev),
- __entry->ctxt,
- __entry->eflags,
- __entry->etype, show_packettype(__entry->etype),
- __entry->hlen,
- __entry->tlen,
- __entry->updegr,
- __entry->etail
- )
+ TP_PROTO(struct hfi1_devdata *dd,
+ u64 eflags,
+ u32 ctxt,
+ u32 etype,
+ u32 hlen,
+ u32 tlen,
+ u32 updegr,
+ u32 etail
+ ),
+ TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, etype)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->eflags = eflags;
+ __entry->ctxt = ctxt;
+ __entry->etype = etype;
+ __entry->hlen = hlen;
+ __entry->tlen = tlen;
+ __entry->updegr = updegr;
+ __entry->etail = etail;
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->etype, show_packettype(__entry->etype),
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
);
TRACE_EVENT(hfi1_receive_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
- TP_ARGS(dd, ctxt),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u32, ctxt)
- __field(u8, slow_path)
- __field(u8, dma_rtail)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
- ),
- TP_printk(
- "[%s] ctxt %d SlowPath: %d DmaRtail: %d",
- __get_str(dev),
- __entry->ctxt,
- __entry->slow_path,
- __entry->dma_rtail
- )
+ TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+ TP_ARGS(dd, ctxt),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u32, ctxt)
+ __field(u8, slow_path)
+ __field(u8, dma_rtail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt) {
+ __entry->slow_path = 1;
+ __entry->dma_rtail = 0xFF;
+ } else if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt_dma_rtail){
+ __entry->dma_rtail = 1;
+ __entry->slow_path = 0;
+ } else if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt_nodma_rtail) {
+ __entry->dma_rtail = 0;
+ __entry->slow_path = 0;
+ }
+ ),
+ TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->slow_path,
+ __entry->dma_rtail
+ )
);
-const char *print_u64_array(struct trace_seq *, u64 *, int);
+TRACE_EVENT(hfi1_exp_tid_reg,
+ TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr,
+ u32 npages, unsigned long va, unsigned long pa,
+ dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+ TP_STRUCT__entry(
+ __field(unsigned, ctxt)
+ __field(u16, subctxt)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
+ )
+ );
-TRACE_EVENT(hfi1_exp_tid_map,
- TP_PROTO(unsigned ctxt, u16 subctxt, int dir,
- unsigned long *maps, u16 count),
- TP_ARGS(ctxt, subctxt, dir, maps, count),
+TRACE_EVENT(hfi1_exp_tid_unreg,
+ TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
TP_STRUCT__entry(
__field(unsigned, ctxt)
__field(u16, subctxt)
- __field(int, dir)
- __field(u16, count)
- __dynamic_array(unsigned long, maps, sizeof(*maps) * count)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __entry->dir = dir;
- __entry->count = count;
- memcpy(__get_dynamic_array(maps), maps,
- sizeof(*maps) * count);
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
),
- TP_printk("[%3u:%02u] %s tidmaps %s",
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
__entry->ctxt,
__entry->subctxt,
- (__entry->dir ? ">" : "<"),
- print_u64_array(p, __get_dynamic_array(maps),
- __entry->count)
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
)
);
-TRACE_EVENT(hfi1_exp_rcv_set,
- TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
- unsigned long vaddr, u64 phys_addr, void *page),
- TP_ARGS(ctxt, subctxt, tid, vaddr, phys_addr, page),
+TRACE_EVENT(hfi1_exp_tid_inval,
+ TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr,
+ u32 npages, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
TP_STRUCT__entry(
__field(unsigned, ctxt)
__field(u16, subctxt)
- __field(u32, tid)
- __field(unsigned long, vaddr)
- __field(u64, phys_addr)
- __field(void *, page)
+ __field(unsigned long, va)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __entry->tid = tid;
- __entry->vaddr = vaddr;
- __entry->phys_addr = phys_addr;
- __entry->page = page;
+ __entry->va = va;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->dma = dma;
),
- TP_printk("[%u:%u] TID %u, vaddrs 0x%lx, physaddr 0x%llx, pgp %p",
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
__entry->ctxt,
__entry->subctxt,
- __entry->tid,
- __entry->vaddr,
- __entry->phys_addr,
- __entry->page
+ __entry->rarr,
+ __entry->npages,
+ __entry->va,
+ __entry->dma
)
);
-TRACE_EVENT(hfi1_exp_rcv_free,
- TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
- unsigned long phys, void *page),
- TP_ARGS(ctxt, subctxt, tid, phys, page),
+TRACE_EVENT(hfi1_mmu_invalidate,
+ TP_PROTO(unsigned ctxt, u16 subctxt, const char *type,
+ unsigned long start, unsigned long end),
+ TP_ARGS(ctxt, subctxt, type, start, end),
TP_STRUCT__entry(
__field(unsigned, ctxt)
__field(u16, subctxt)
- __field(u32, tid)
- __field(unsigned long, phys)
- __field(void *, page)
+ __string(type, type)
+ __field(unsigned long, start)
+ __field(unsigned long, end)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __entry->tid = tid;
- __entry->phys = phys;
- __entry->page = page;
+ __assign_str(type, type);
+ __entry->start = start;
+ __entry->end = end;
),
- TP_printk("[%u:%u] freeing TID %u, 0x%lx, pgp %p",
+ TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
__entry->ctxt,
__entry->subctxt,
- __entry->tid,
- __entry->phys,
- __entry->page
+ __get_str(type),
+ __entry->start,
+ __entry->end
)
);
+
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_tx
TRACE_EVENT(hfi1_piofree,
- TP_PROTO(struct send_context *sc, int extra),
- TP_ARGS(sc, extra),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sc->dd)
- __field(u32, sw_index)
- __field(u32, hw_context)
- __field(int, extra)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sc->dd);
- __entry->sw_index = sc->sw_index;
- __entry->hw_context = sc->hw_context;
- __entry->extra = extra;
- ),
- TP_printk(
- "[%s] ctxt %u(%u) extra %d",
- __get_str(dev),
- __entry->sw_index,
- __entry->hw_context,
- __entry->extra
- )
+ TP_PROTO(struct send_context *sc, int extra),
+ TP_ARGS(sc, extra),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+ __field(u32, sw_index)
+ __field(u32, hw_context)
+ __field(int, extra)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+ __entry->sw_index = sc->sw_index;
+ __entry->hw_context = sc->hw_context;
+ __entry->extra = extra;
+ ),
+ TP_printk("[%s] ctxt %u(%u) extra %d",
+ __get_str(dev),
+ __entry->sw_index,
+ __entry->hw_context,
+ __entry->extra
+ )
);
TRACE_EVENT(hfi1_wantpiointr,
- TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
- TP_ARGS(sc, needint, credit_ctrl),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sc->dd)
- __field(u32, sw_index)
- __field(u32, hw_context)
- __field(u32, needint)
- __field(u64, credit_ctrl)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sc->dd);
- __entry->sw_index = sc->sw_index;
- __entry->hw_context = sc->hw_context;
- __entry->needint = needint;
- __entry->credit_ctrl = credit_ctrl;
- ),
- TP_printk(
- "[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
- __get_str(dev),
- __entry->sw_index,
- __entry->hw_context,
- __entry->needint,
- (unsigned long long)__entry->credit_ctrl
- )
+ TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
+ TP_ARGS(sc, needint, credit_ctrl),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+ __field(u32, sw_index)
+ __field(u32, hw_context)
+ __field(u32, needint)
+ __field(u64, credit_ctrl)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+ __entry->sw_index = sc->sw_index;
+ __entry->hw_context = sc->hw_context;
+ __entry->needint = needint;
+ __entry->credit_ctrl = credit_ctrl;
+ ),
+ TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
+ __get_str(dev),
+ __entry->sw_index,
+ __entry->hw_context,
+ __entry->needint,
+ (unsigned long long)__entry->credit_ctrl
+ )
);
DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
- TP_PROTO(struct hfi1_qp *qp, u32 flags),
- TP_ARGS(qp, flags),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, flags)
- __field(u32, s_flags)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->flags = flags;
- __entry->qpn = qp->ibqp.qp_num;
- __entry->s_flags = qp->s_flags;
- ),
- TP_printk(
- "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
- __get_str(dev),
- __entry->qpn,
- __entry->flags,
- __entry->s_flags
- )
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, flags)
+ __field(u32, s_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->flags = flags;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->flags,
+ __entry->s_flags
+ )
);
DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
- TP_PROTO(struct hfi1_qp *qp, u32 flags),
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
TP_ARGS(qp, flags));
DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
- TP_PROTO(struct hfi1_qp *qp, u32 flags),
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
TP_ARGS(qp, flags));
#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_qphash
-DECLARE_EVENT_CLASS(hfi1_qphash_template,
- TP_PROTO(struct hfi1_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, bucket)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->bucket = bucket;
- ),
- TP_printk(
- "[%s] qpn 0x%x bucket %u",
- __get_str(dev),
- __entry->qpn,
- __entry->bucket
- )
-);
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert,
- TP_PROTO(struct hfi1_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove,
- TP_PROTO(struct hfi1_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
-const char *parse_everbs_hdrs(
- struct trace_seq *p,
- u8 opcode,
- void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
-const char *parse_sdma_flags(
- struct trace_seq *p,
- u64 desc0, u64 desc1);
+const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
-
#define lrh_name(lrh) { HFI1_##lrh, #lrh }
#define show_lnh(lrh) \
__print_symbolic(lrh, \
@@ -420,7 +408,6 @@ __print_symbolic(opcode, \
ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(CNP))
-
#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
#define BTH_PRN \
"op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
@@ -428,124 +415,130 @@ __print_symbolic(opcode, \
#define EHDR_PRN "%s"
DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
- TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- /* LRH */
- __field(u8, vl)
- __field(u8, lver)
- __field(u8, sl)
- __field(u8, lnh)
- __field(u16, dlid)
- __field(u16, len)
- __field(u16, slid)
- /* BTH */
- __field(u8, opcode)
- __field(u8, se)
- __field(u8, m)
- __field(u8, pad)
- __field(u8, tver)
- __field(u16, pkey)
- __field(u8, f)
- __field(u8, b)
- __field(u32, qpn)
- __field(u8, a)
- __field(u32, psn)
- /* extended headers */
- __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
- ),
- TP_fast_assign(
- struct hfi1_other_headers *ohdr;
-
- DD_DEV_ASSIGN(dd);
- /* LRH */
- __entry->vl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
- __entry->lver =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
- __entry->sl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->lnh =
- (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- __entry->dlid =
- be16_to_cpu(hdr->lrh[1]);
- /* allow for larger len */
- __entry->len =
- be16_to_cpu(hdr->lrh[2]);
- __entry->slid =
- be16_to_cpu(hdr->lrh[3]);
- /* BTH */
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- __entry->opcode =
- (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->se =
- (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
- __entry->m =
- (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
- __entry->pad =
- (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- __entry->tver =
- (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
- __entry->pkey =
- be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT)
- & HFI1_FECN_MASK;
- __entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT)
- & HFI1_BECN_MASK;
- __entry->qpn =
- be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
- __entry->a =
- (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
- /* allow for larger PSN */
- __entry->psn =
- be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
- /* extended headers */
- memcpy(
- __get_dynamic_array(ehdrs),
- &ohdr->u,
- ibhdr_exhdr_len(hdr));
- ),
- TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
- __get_str(dev),
- /* LRH */
- __entry->vl,
- __entry->lver,
- __entry->sl,
- __entry->lnh, show_lnh(__entry->lnh),
- __entry->dlid,
- __entry->len,
- __entry->slid,
- /* BTH */
- __entry->opcode, show_ib_opcode(__entry->opcode),
- __entry->se,
- __entry->m,
- __entry->pad,
- __entry->tver,
- __entry->pkey,
- __entry->f,
- __entry->b,
- __entry->qpn,
- __entry->a,
- __entry->psn,
- /* extended headers */
- __parse_ib_ehdrs(
- __entry->opcode,
- (void *)__get_dynamic_array(ehdrs))
- )
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ /* LRH */
+ __field(u8, vl)
+ __field(u8, lver)
+ __field(u8, sl)
+ __field(u8, lnh)
+ __field(u16, dlid)
+ __field(u16, len)
+ __field(u16, slid)
+ /* BTH */
+ __field(u8, opcode)
+ __field(u8, se)
+ __field(u8, m)
+ __field(u8, pad)
+ __field(u8, tver)
+ __field(u16, pkey)
+ __field(u8, f)
+ __field(u8, b)
+ __field(u32, qpn)
+ __field(u8, a)
+ __field(u32, psn)
+ /* extended headers */
+ __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+ ),
+ TP_fast_assign(
+ struct hfi1_other_headers *ohdr;
+
+ DD_DEV_ASSIGN(dd);
+ /* LRH */
+ __entry->vl =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
+ __entry->lver =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
+ __entry->sl =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ __entry->lnh =
+ (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+ __entry->dlid =
+ be16_to_cpu(hdr->lrh[1]);
+ /* allow for larger len */
+ __entry->len =
+ be16_to_cpu(hdr->lrh[2]);
+ __entry->slid =
+ be16_to_cpu(hdr->lrh[3]);
+ /* BTH */
+ if (__entry->lnh == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ __entry->opcode =
+ (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+ __entry->se =
+ (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
+ __entry->m =
+ (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
+ __entry->pad =
+ (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ __entry->tver =
+ (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
+ __entry->pkey =
+ be32_to_cpu(ohdr->bth[0]) & 0xffff;
+ __entry->f =
+ (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
+ HFI1_FECN_MASK;
+ __entry->b =
+ (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
+ HFI1_BECN_MASK;
+ __entry->qpn =
+ be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+ __entry->a =
+ (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
+ /* allow for larger PSN */
+ __entry->psn =
+ be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+ /* extended headers */
+ memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+ ibhdr_exhdr_len(hdr));
+ ),
+ TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ /* LRH */
+ __entry->vl,
+ __entry->lver,
+ __entry->sl,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->dlid,
+ __entry->len,
+ __entry->slid,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->m,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->f,
+ __entry->b,
+ __entry->qpn,
+ __entry->a,
+ __entry->psn,
+ /* extended headers */
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
);
DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
TP_ARGS(dd, hdr));
-DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
+DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
TP_ARGS(dd, hdr));
@@ -556,15 +549,14 @@ DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_snoop
-
TRACE_EVENT(snoop_capture,
- TP_PROTO(struct hfi1_devdata *dd,
- int hdr_len,
- struct hfi1_ib_header *hdr,
- int data_len,
- void *data),
- TP_ARGS(dd, hdr_len, hdr, data_len, data),
- TP_STRUCT__entry(
+ TP_PROTO(struct hfi1_devdata *dd,
+ int hdr_len,
+ struct hfi1_ib_header *hdr,
+ int data_len,
+ void *data),
+ TP_ARGS(dd, hdr_len, hdr, data_len, data),
+ TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
__field(u16, slid)
__field(u16, dlid)
@@ -577,8 +569,8 @@ TRACE_EVENT(snoop_capture,
__field(u8, lnh)
__dynamic_array(u8, raw_hdr, hdr_len)
__dynamic_array(u8, raw_pkt, data_len)
- ),
- TP_fast_assign(
+ ),
+ TP_fast_assign(
struct hfi1_other_headers *ohdr;
__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -589,7 +581,7 @@ TRACE_EVENT(snoop_capture,
DD_DEV_ASSIGN(dd);
__entry->slid = be16_to_cpu(hdr->lrh[3]);
__entry->dlid = be16_to_cpu(hdr->lrh[1]);
- __entry->qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+ __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
__entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
__entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
@@ -597,8 +589,9 @@ TRACE_EVENT(snoop_capture,
__entry->data_len = data_len;
memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
memcpy(__get_dynamic_array(raw_pkt), data, data_len);
- ),
- TP_printk("[%s] " SNOOP_PRN,
+ ),
+ TP_printk(
+ "[%s] " SNOOP_PRN,
__get_str(dev),
__entry->slid,
__entry->dlid,
@@ -609,7 +602,7 @@ TRACE_EVENT(snoop_capture,
__entry->pkey,
__entry->hdr_len,
__entry->data_len
- )
+ )
);
#undef TRACE_SYSTEM
@@ -621,41 +614,39 @@ TRACE_EVENT(snoop_capture,
TRACE_EVENT(hfi1_uctxtdata,
TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
TP_ARGS(dd, uctxt),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(unsigned, ctxt)
- __field(u32, credits)
- __field(u64, hw_free)
- __field(u64, piobase)
- __field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
- __field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = uctxt->ctxt;
- __entry->credits = uctxt->sc->credits;
- __entry->hw_free = (u64)uctxt->sc->hw_free;
- __entry->piobase = (u64)uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
- __entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys = uctxt->egrbufs.rcvtids[0].phys;
- ),
- TP_printk(
- "[%s] ctxt %u " UCTXT_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->credits,
- __entry->hw_free,
- __entry->piobase,
- __entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
- __entry->eager_cnt,
- __entry->rcvegr_phys
- )
- );
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(unsigned, ctxt)
+ __field(u32, credits)
+ __field(u64, hw_free)
+ __field(u64, piobase)
+ __field(u16, rcvhdrq_cnt)
+ __field(u64, rcvhdrq_phys)
+ __field(u32, eager_cnt)
+ __field(u64, rcvegr_phys)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = uctxt->ctxt;
+ __entry->credits = uctxt->sc->credits;
+ __entry->hw_free = (u64)uctxt->sc->hw_free;
+ __entry->piobase = (u64)uctxt->sc->base_addr;
+ __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->eager_cnt = uctxt->egrbufs.alloced;
+ __entry->rcvegr_phys =
+ uctxt->egrbufs.rcvtids[0].phys;
+ ),
+ TP_printk("[%s] ctxt %u " UCTXT_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->credits,
+ __entry->hw_free,
+ __entry->piobase,
+ __entry->rcvhdrq_cnt,
+ __entry->rcvhdrq_phys,
+ __entry->eager_cnt,
+ __entry->rcvegr_phys
+ )
+);
#define CINFO_FMT \
"egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
@@ -663,38 +654,35 @@ TRACE_EVENT(hfi1_ctxt_info,
TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt,
struct hfi1_ctxt_info cinfo),
TP_ARGS(dd, ctxt, subctxt, cinfo),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(unsigned, ctxt)
- __field(unsigned, subctxt)
- __field(u16, egrtids)
- __field(u16, rcvhdrq_cnt)
- __field(u16, rcvhdrq_size)
- __field(u16, sdma_ring_size)
- __field(u32, rcvegr_size)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->egrtids = cinfo.egrtids;
- __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
- __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
- __entry->sdma_ring_size = cinfo.sdma_ring_size;
- __entry->rcvegr_size = cinfo.rcvegr_size;
- ),
- TP_printk(
- "[%s] ctxt %u:%u " CINFO_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->egrtids,
- __entry->rcvegr_size,
- __entry->rcvhdrq_cnt,
- __entry->rcvhdrq_size,
- __entry->sdma_ring_size
- )
- );
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(unsigned, ctxt)
+ __field(unsigned, subctxt)
+ __field(u16, egrtids)
+ __field(u16, rcvhdrq_cnt)
+ __field(u16, rcvhdrq_size)
+ __field(u16, sdma_ring_size)
+ __field(u32, rcvegr_size)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->egrtids = cinfo.egrtids;
+ __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
+ __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
+ __entry->sdma_ring_size = cinfo.sdma_ring_size;
+ __entry->rcvegr_size = cinfo.rcvegr_size;
+ ),
+ TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->egrtids,
+ __entry->rcvegr_size,
+ __entry->rcvhdrq_cnt,
+ __entry->rcvhdrq_size,
+ __entry->sdma_ring_size
+ )
+);
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_sma
@@ -708,52 +696,48 @@ TRACE_EVENT(hfi1_ctxt_info,
)
DECLARE_EVENT_CLASS(hfi1_bct_template,
- TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
- TP_ARGS(dd, bc),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __dynamic_array(u8, bct, sizeof(*bc))
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- memcpy(
- __get_dynamic_array(bct),
- bc,
- sizeof(*bc));
- ),
- TP_printk(BCT_FORMAT,
- BCT(overall_shared_limit),
-
- BCT(vl[0].dedicated),
- BCT(vl[0].shared),
-
- BCT(vl[1].dedicated),
- BCT(vl[1].shared),
-
- BCT(vl[2].dedicated),
- BCT(vl[2].shared),
-
- BCT(vl[3].dedicated),
- BCT(vl[3].shared),
-
- BCT(vl[4].dedicated),
- BCT(vl[4].shared),
-
- BCT(vl[5].dedicated),
- BCT(vl[5].shared),
-
- BCT(vl[6].dedicated),
- BCT(vl[6].shared),
-
- BCT(vl[7].dedicated),
- BCT(vl[7].shared),
-
- BCT(vl[15].dedicated),
- BCT(vl[15].shared)
- )
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct buffer_control *bc),
+ TP_ARGS(dd, bc),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __dynamic_array(u8, bct, sizeof(*bc))
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ memcpy(__get_dynamic_array(bct), bc,
+ sizeof(*bc));
+ ),
+ TP_printk(BCT_FORMAT,
+ BCT(overall_shared_limit),
+
+ BCT(vl[0].dedicated),
+ BCT(vl[0].shared),
+
+ BCT(vl[1].dedicated),
+ BCT(vl[1].shared),
+
+ BCT(vl[2].dedicated),
+ BCT(vl[2].shared),
+
+ BCT(vl[3].dedicated),
+ BCT(vl[3].shared),
+
+ BCT(vl[4].dedicated),
+ BCT(vl[4].shared),
+
+ BCT(vl[5].dedicated),
+ BCT(vl[5].shared),
+
+ BCT(vl[6].dedicated),
+ BCT(vl[6].shared),
+
+ BCT(vl[7].dedicated),
+ BCT(vl[7].shared),
+
+ BCT(vl[15].dedicated),
+ BCT(vl[15].shared)
+ )
);
-
DEFINE_EVENT(hfi1_bct_template, bct_set,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
@@ -766,252 +750,209 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
#define TRACE_SYSTEM hfi1_sdma
TRACE_EVENT(hfi1_sdma_descriptor,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 desc0,
- u64 desc1,
- u16 e,
- void *descp),
+ TP_PROTO(struct sdma_engine *sde,
+ u64 desc0,
+ u64 desc1,
+ u16 e,
+ void *descp),
TP_ARGS(sde, desc0, desc1, e, descp),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __field(void *, descp)
- __field(u64, desc0)
- __field(u64, desc1)
- __field(u16, e)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __entry->desc0 = desc0;
- __entry->desc1 = desc1;
- __entry->idx = sde->this_idx;
- __entry->descp = descp;
- __entry->e = e;
- ),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(void *, descp)
+ __field(u64, desc0)
+ __field(u64, desc1)
+ __field(u16, e)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->desc0 = desc0;
+ __entry->desc1 = desc1;
+ __entry->idx = sde->this_idx;
+ __entry->descp = descp;
+ __entry->e = e;
+ ),
TP_printk(
- "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
- __get_str(dev),
- __entry->idx,
- __parse_sdma_flags(__entry->desc0, __entry->desc1),
- (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT)
- & SDMA_DESC0_PHY_ADDR_MASK,
- (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT)
- & SDMA_DESC1_GENERATION_MASK),
- (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT)
- & SDMA_DESC0_BYTE_COUNT_MASK),
- __entry->desc0,
- __entry->desc1,
- __entry->descp,
- __entry->e
- )
+ "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
+ __get_str(dev),
+ __entry->idx,
+ __parse_sdma_flags(__entry->desc0, __entry->desc1),
+ (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
+ SDMA_DESC0_PHY_ADDR_MASK,
+ (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
+ SDMA_DESC1_GENERATION_MASK),
+ (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
+ SDMA_DESC0_BYTE_COUNT_MASK),
+ __entry->desc0,
+ __entry->desc1,
+ __entry->descp,
+ __entry->e
+ )
);
TRACE_EVENT(hfi1_sdma_engine_select,
- TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
- TP_ARGS(dd, sel, vl, idx),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u32, sel)
- __field(u8, vl)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->sel = sel;
- __entry->vl = vl;
- __entry->idx = idx;
- ),
- TP_printk(
- "[%s] selecting SDE %u sel 0x%x vl %u",
- __get_str(dev),
- __entry->idx,
- __entry->sel,
- __entry->vl
- )
+ TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
+ TP_ARGS(dd, sel, vl, idx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u32, sel)
+ __field(u8, vl)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->sel = sel;
+ __entry->vl = vl;
+ __entry->idx = idx;
+ ),
+ TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sel,
+ __entry->vl
+ )
);
DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 status
- ),
- TP_ARGS(sde, status),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __field(u64, status)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __entry->status = status;
- __entry->idx = sde->this_idx;
- ),
- TP_printk(
- "[%s] SDE(%u) status %llx",
- __get_str(dev),
- __entry->idx,
- (unsigned long long)__entry->status
- )
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, status)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->status = status;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk("[%s] SDE(%u) status %llx",
+ __get_str(dev),
+ __entry->idx,
+ (unsigned long long)__entry->status
+ )
);
DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 status
- ),
- TP_ARGS(sde, status)
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status)
);
DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 status
- ),
- TP_ARGS(sde, status)
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status)
);
DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
- TP_PROTO(
- struct sdma_engine *sde,
- int aidx
- ),
- TP_ARGS(sde, aidx),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __field(int, aidx)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __entry->idx = sde->this_idx;
- __entry->aidx = aidx;
- ),
- TP_printk(
- "[%s] SDE(%u) aidx %d",
- __get_str(dev),
- __entry->idx,
- __entry->aidx
- )
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(int, aidx)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->idx = sde->this_idx;
+ __entry->aidx = aidx;
+ ),
+ TP_printk("[%s] SDE(%u) aidx %d",
+ __get_str(dev),
+ __entry->idx,
+ __entry->aidx
+ )
);
DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
- TP_PROTO(
- struct sdma_engine *sde,
- int aidx
- ),
+ TP_PROTO(struct sdma_engine *sde, int aidx),
TP_ARGS(sde, aidx));
DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
- TP_PROTO(
- struct sdma_engine *sde,
- int aidx
- ),
+ TP_PROTO(struct sdma_engine *sde, int aidx),
TP_ARGS(sde, aidx));
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
TRACE_EVENT(hfi1_sdma_progress,
- TP_PROTO(
- struct sdma_engine *sde,
- u16 hwhead,
- u16 swhead,
- struct sdma_txreq *txp
- ),
- TP_ARGS(sde, hwhead, swhead, txp),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __field(u64, sn)
- __field(u16, hwhead)
- __field(u16, swhead)
- __field(u16, txnext)
- __field(u16, tx_tail)
- __field(u16, tx_head)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __entry->hwhead = hwhead;
- __entry->swhead = swhead;
- __entry->tx_tail = sde->tx_tail;
- __entry->tx_head = sde->tx_head;
- __entry->txnext = txp ? txp->next_descq_idx : ~0;
- __entry->idx = sde->this_idx;
- __entry->sn = txp ? txp->sn : ~0;
- ),
- TP_printk(
- "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
- __get_str(dev),
- __entry->idx,
- __entry->sn,
- __entry->hwhead,
- __entry->swhead,
- __entry->txnext,
- __entry->tx_head,
- __entry->tx_tail
- )
+ TP_PROTO(struct sdma_engine *sde,
+ u16 hwhead,
+ u16 swhead,
+ struct sdma_txreq *txp
+ ),
+ TP_ARGS(sde, hwhead, swhead, txp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, sn)
+ __field(u16, hwhead)
+ __field(u16, swhead)
+ __field(u16, txnext)
+ __field(u16, tx_tail)
+ __field(u16, tx_head)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->hwhead = hwhead;
+ __entry->swhead = swhead;
+ __entry->tx_tail = sde->tx_tail;
+ __entry->tx_head = sde->tx_head;
+ __entry->txnext = txp ? txp->next_descq_idx : ~0;
+ __entry->idx = sde->this_idx;
+ __entry->sn = txp ? txp->sn : ~0;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sn,
+ __entry->hwhead,
+ __entry->swhead,
+ __entry->txnext,
+ __entry->tx_head,
+ __entry->tx_tail
+ )
);
#else
TRACE_EVENT(hfi1_sdma_progress,
- TP_PROTO(
- struct sdma_engine *sde,
- u16 hwhead,
- u16 swhead,
- struct sdma_txreq *txp
+ TP_PROTO(struct sdma_engine *sde,
+ u16 hwhead, u16 swhead,
+ struct sdma_txreq *txp
),
TP_ARGS(sde, hwhead, swhead, txp),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __field(u16, hwhead)
- __field(u16, swhead)
- __field(u16, txnext)
- __field(u16, tx_tail)
- __field(u16, tx_head)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __entry->hwhead = hwhead;
- __entry->swhead = swhead;
- __entry->tx_tail = sde->tx_tail;
- __entry->tx_head = sde->tx_head;
- __entry->txnext = txp ? txp->next_descq_idx : ~0;
- __entry->idx = sde->this_idx;
- ),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u16, hwhead)
+ __field(u16, swhead)
+ __field(u16, txnext)
+ __field(u16, tx_tail)
+ __field(u16, tx_head)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->hwhead = hwhead;
+ __entry->swhead = swhead;
+ __entry->tx_tail = sde->tx_tail;
+ __entry->tx_head = sde->tx_head;
+ __entry->txnext = txp ? txp->next_descq_idx : ~0;
+ __entry->idx = sde->this_idx;
+ ),
TP_printk(
- "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
- __get_str(dev),
- __entry->idx,
- __entry->hwhead,
- __entry->swhead,
- __entry->txnext,
- __entry->tx_head,
- __entry->tx_tail
- )
+ "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->hwhead,
+ __entry->swhead,
+ __entry->txnext,
+ __entry->tx_head,
+ __entry->tx_tail
+ )
);
#endif
DECLARE_EVENT_CLASS(hfi1_sdma_sn,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 sn
- ),
- TP_ARGS(sde, sn),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __field(u64, sn)
- __field(u8, idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __entry->sn = sn;
- __entry->idx = sde->this_idx;
- ),
- TP_printk(
- "[%s] SDE(%u) sn %llu",
- __get_str(dev),
- __entry->idx,
- __entry->sn
- )
+ TP_PROTO(struct sdma_engine *sde, u64 sn),
+ TP_ARGS(sde, sn),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, sn)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->sn = sn;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk("[%s] SDE(%u) sn %llu",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sn
+ )
);
DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
@@ -1023,10 +964,7 @@ DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
);
DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 sn
- ),
+ TP_PROTO(struct sdma_engine *sde, u64 sn),
TP_ARGS(sde, sn)
);
@@ -1227,88 +1165,85 @@ TRACE_EVENT(hfi1_sdma_user_header_ahg,
);
TRACE_EVENT(hfi1_sdma_state,
- TP_PROTO(
- struct sdma_engine *sde,
- const char *cstate,
- const char *nstate
- ),
- TP_ARGS(sde, cstate, nstate),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(sde->dd)
- __string(curstate, cstate)
- __string(newstate, nstate)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(sde->dd);
- __assign_str(curstate, cstate);
- __assign_str(newstate, nstate);
- ),
+ TP_PROTO(struct sdma_engine *sde,
+ const char *cstate,
+ const char *nstate
+ ),
+ TP_ARGS(sde, cstate, nstate),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __string(curstate, cstate)
+ __string(newstate, nstate)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __assign_str(curstate, cstate);
+ __assign_str(newstate, nstate);
+ ),
TP_printk("[%s] current state %s new state %s",
- __get_str(dev),
- __get_str(curstate),
- __get_str(newstate)
- )
+ __get_str(dev),
+ __get_str(curstate),
+ __get_str(newstate)
+ )
);
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_rc
DECLARE_EVENT_CLASS(hfi1_rc_template,
- TP_PROTO(struct hfi1_qp *qp, u32 psn),
- TP_ARGS(qp, psn),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, s_flags)
- __field(u32, psn)
- __field(u32, s_psn)
- __field(u32, s_next_psn)
- __field(u32, s_sending_psn)
- __field(u32, s_sending_hpsn)
- __field(u32, r_psn)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->s_flags = qp->s_flags;
- __entry->psn = psn;
- __entry->s_psn = qp->s_psn;
- __entry->s_next_psn = qp->s_next_psn;
- __entry->s_sending_psn = qp->s_sending_psn;
- __entry->s_sending_hpsn = qp->s_sending_hpsn;
- __entry->r_psn = qp->r_psn;
- ),
- TP_printk(
- "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
- __get_str(dev),
- __entry->qpn,
- __entry->s_flags,
- __entry->psn,
- __entry->s_psn,
- __entry->s_next_psn,
- __entry->s_sending_psn,
- __entry->s_sending_hpsn,
- __entry->r_psn
- )
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u32, psn)
+ __field(u32, s_psn)
+ __field(u32, s_next_psn)
+ __field(u32, s_sending_psn)
+ __field(u32, s_sending_hpsn)
+ __field(u32, r_psn)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->psn = psn;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_next_psn = qp->s_next_psn;
+ __entry->s_sending_psn = qp->s_sending_psn;
+ __entry->s_sending_hpsn = qp->s_sending_hpsn;
+ __entry->r_psn = qp->r_psn;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->psn,
+ __entry->s_psn,
+ __entry->s_next_psn,
+ __entry->s_sending_psn,
+ __entry->s_sending_hpsn,
+ __entry->r_psn
+ )
);
DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete,
- TP_PROTO(struct hfi1_qp *qp, u32 psn),
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack,
- TP_PROTO(struct hfi1_qp *qp, u32 psn),
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout,
- TP_PROTO(struct hfi1_qp *qp, u32 psn),
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
- TP_PROTO(struct hfi1_qp *qp, u32 psn),
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
@@ -1316,21 +1251,20 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
#define TRACE_SYSTEM hfi1_misc
TRACE_EVENT(hfi1_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
- int src),
- TP_ARGS(dd, is_entry, src),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __array(char, buf, 64)
- __field(int, src)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd)
- is_entry->is_name(__entry->buf, 64, src - is_entry->start);
- __entry->src = src;
- ),
- TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
- __entry->src)
+ TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
+ int src),
+ TP_ARGS(dd, is_entry, src),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __array(char, buf, 64)
+ __field(int, src)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd)
+ is_entry->is_name(__entry->buf, 64,
+ src - is_entry->start);
+ __entry->src = src;
+ ),
+ TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
+ __entry->src)
);
/*
@@ -1345,21 +1279,21 @@ TRACE_EVENT(hfi1_interrupt,
#define MAX_MSG_LEN 512
DECLARE_EVENT_CLASS(hfi1_trace_template,
- TP_PROTO(const char *function, struct va_format *vaf),
- TP_ARGS(function, vaf),
- TP_STRUCT__entry(
- __string(function, function)
- __dynamic_array(char, msg, MAX_MSG_LEN)
- ),
- TP_fast_assign(
- __assign_str(function, function);
- WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >= MAX_MSG_LEN);
- ),
- TP_printk("(%s) %s",
- __get_str(function),
- __get_str(msg))
+ TP_PROTO(const char *function, struct va_format *vaf),
+ TP_ARGS(function, vaf),
+ TP_STRUCT__entry(__string(function, function)
+ __dynamic_array(char, msg, MAX_MSG_LEN)
+ ),
+ TP_fast_assign(__assign_str(function, function);
+ WARN_ON_ONCE(vsnprintf
+ (__get_dynamic_array(msg),
+ MAX_MSG_LEN, vaf->fmt,
+ *vaf->va) >=
+ MAX_MSG_LEN);
+ ),
+ TP_printk("(%s) %s",
+ __get_str(function),
+ __get_str(msg))
);
/*
@@ -1406,6 +1340,7 @@ __hfi1_trace_def(DC8051);
__hfi1_trace_def(FIRMWARE);
__hfi1_trace_def(RCVCTRL);
__hfi1_trace_def(TID);
+__hfi1_trace_def(MMU);
#define hfi1_cdbg(which, fmt, ...) \
__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c
index ea54fd2700ad..e82e52a63d35 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/staging/rdma/hfi1/twsi.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -119,9 +116,9 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
* Allow for slow slaves by simple
* delay for falling edge, sampling on rise.
*/
- if (!bit)
+ if (!bit) {
udelay(2);
- else {
+ } else {
int rise_usec;
for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
@@ -131,11 +128,24 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
}
if (rise_usec <= 0)
dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
- SCL_WAIT_USEC);
+ SCL_WAIT_USEC);
}
i2c_wait_for_writes(dd, target);
}
+static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait)
+{
+ u32 read_val, mask;
+
+ mask = QSFP_HFI0_I2CCLK;
+ /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+ hfi1_gpio_mod(dd, target, 0, 0, mask);
+ read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
+ if (wait)
+ i2c_wait_for_writes(dd, target);
+ return (read_val & mask) >> GPIO_SCL_NUM;
+}
+
static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
{
u32 mask;
@@ -274,13 +284,12 @@ static void stop_cmd(struct hfi1_devdata *dd, u32 target)
/**
* hfi1_twsi_reset - reset I2C communication
* @dd: the hfi1_ib device
+ * returns 0 if ok, -EIO on error
*/
-
int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
{
int clock_cycles_left = 9;
- int was_high = 0;
- u32 pins, mask;
+ u32 mask;
/* Both SCL and SDA should be high. If not, there
* is something wrong.
@@ -294,43 +303,23 @@ int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
*/
hfi1_gpio_mod(dd, target, 0, 0, mask);
- /*
- * Clock nine times to get all listeners into a sane state.
- * If SDA does not go high at any point, we are wedged.
- * One vendor recommends then issuing START followed by STOP.
- * we cannot use our "normal" functions to do that, because
- * if SCL drops between them, another vendor's part will
- * wedge, dropping SDA and keeping it low forever, at the end of
- * the next transaction (even if it was not the device addressed).
- * So our START and STOP take place with SCL held high.
+ /* Check if SCL is low, if it is low then we have a slave device
+ * misbehaving and there is not much we can do.
+ */
+ if (!scl_in(dd, target, 0))
+ return -EIO;
+
+ /* Check if SDA is low, if it is low then we have to clock SDA
+ * up to 9 times for the device to release the bus
*/
while (clock_cycles_left--) {
+ if (sda_in(dd, target, 0))
+ return 0;
scl_out(dd, target, 0);
scl_out(dd, target, 1);
- /* Note if SDA is high, but keep clocking to sync slave */
- was_high |= sda_in(dd, target, 0);
- }
-
- if (was_high) {
- /*
- * We saw a high, which we hope means the slave is sync'd.
- * Issue START, STOP, pause for T_BUF.
- */
-
- pins = hfi1_gpio_mod(dd, target, 0, 0, 0);
- if ((pins & mask) != mask)
- dd_dev_err(dd, "GPIO pins not at rest: %d\n",
- pins & mask);
- /* Drop SDA to issue START */
- udelay(1); /* Guarantee .6 uSec setup */
- sda_out(dd, target, 0);
- udelay(1); /* Guarantee .6 uSec hold */
- /* At this point, SCL is high, SDA low. Raise SDA for STOP */
- sda_out(dd, target, 1);
- udelay(TWSI_BUF_WAIT_USEC);
}
- return !was_high;
+ return -EIO;
}
#define HFI1_TWSI_START 0x100
@@ -365,17 +354,25 @@ static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags)
* HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
* which responded to all TWSI device codes, interpreting them as
* address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
* the "register" or "offset" within the device from which data should
* be read.
*/
int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
void *buffer, int len)
{
- int ret;
u8 *bp = buffer;
+ int ret = 1;
+ int i;
+ int offset_size;
+
+ /* obtain the offset size, strip it from the device address */
+ offset_size = (dev >> 8) & 0xff;
+ dev &= 0xff;
- ret = 1;
+ /* allow at most a 2 byte offset */
+ if (offset_size > 2)
+ goto bail;
if (dev == HFI1_TWSI_NO_DEV) {
/* legacy not-really-I2C */
@@ -383,34 +380,29 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
} else {
/* Actual I2C */
- ret = twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START);
- if (ret) {
- stop_cmd(dd, target);
- ret = 1;
- goto bail;
- }
- /*
- * SFF spec claims we do _not_ stop after the addr
- * but simply issue a start with the "read" dev-addr.
- * Since we are implicitly waiting for ACK here,
- * we need t_buf (nominally 20uSec) before that start,
- * and cannot rely on the delay built in to the STOP
- */
- ret = twsi_wr(dd, target, addr, 0);
- udelay(TWSI_BUF_WAIT_USEC);
+ if (offset_size) {
+ ret = twsi_wr(dd, target,
+ dev | WRITE_CMD, HFI1_TWSI_START);
+ if (ret) {
+ stop_cmd(dd, target);
+ goto bail;
+ }
- if (ret) {
- dd_dev_err(dd,
- "Failed to write interface read addr %02X\n",
- addr);
- ret = 1;
- goto bail;
+ for (i = 0; i < offset_size; i++) {
+ ret = twsi_wr(dd, target,
+ (addr >> (i * 8)) & 0xff, 0);
+ udelay(TWSI_BUF_WAIT_USEC);
+ if (ret) {
+ dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+ i, addr);
+ goto bail;
+ }
+ }
}
ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
}
if (ret) {
stop_cmd(dd, target);
- ret = 1;
goto bail;
}
@@ -442,76 +434,55 @@ bail:
* HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
* which responded to all TWSI device codes, interpreting them as
* address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
* the "register" or "offset" within the device to which data should
* be written.
*/
int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
const void *buffer, int len)
{
- int sub_len;
const u8 *bp = buffer;
- int max_wait_time, i;
int ret = 1;
+ int i;
+ int offset_size;
- while (len > 0) {
- if (dev == HFI1_TWSI_NO_DEV) {
- if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
- HFI1_TWSI_START)) {
- goto failed_write;
- }
- } else {
- /* Real I2C */
- if (twsi_wr(dd, target,
- dev | WRITE_CMD, HFI1_TWSI_START))
- goto failed_write;
- ret = twsi_wr(dd, target, addr, 0);
- if (ret) {
- dd_dev_err(dd,
- "Failed to write interface write addr %02X\n",
- addr);
- goto failed_write;
- }
- }
-
- sub_len = min(len, 4);
- addr += sub_len;
- len -= sub_len;
+ /* obtain the offset size, strip it from the device address */
+ offset_size = (dev >> 8) & 0xff;
+ dev &= 0xff;
- for (i = 0; i < sub_len; i++)
- if (twsi_wr(dd, target, *bp++, 0))
- goto failed_write;
+ /* allow at most a 2 byte offset */
+ if (offset_size > 2)
+ goto bail;
- stop_cmd(dd, target);
+ if (dev == HFI1_TWSI_NO_DEV) {
+ if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
+ HFI1_TWSI_START)) {
+ goto failed_write;
+ }
+ } else {
+ /* Real I2C */
+ if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START))
+ goto failed_write;
+ }
- /*
- * Wait for write complete by waiting for a successful
- * read (the chip replies with a zero after the write
- * cmd completes, and before it writes to the eeprom.
- * The startcmd for the read will fail the ack until
- * the writes have completed. We do this inline to avoid
- * the debug prints that are in the real read routine
- * if the startcmd fails.
- * We also use the proper device address, so it doesn't matter
- * whether we have real eeprom_dev. Legacy likes any address.
- */
- max_wait_time = 100;
- while (twsi_wr(dd, target,
- dev | READ_CMD, HFI1_TWSI_START)) {
- stop_cmd(dd, target);
- if (!--max_wait_time)
- goto failed_write;
+ for (i = 0; i < offset_size; i++) {
+ ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0);
+ udelay(TWSI_BUF_WAIT_USEC);
+ if (ret) {
+ dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+ i, addr);
+ goto bail;
}
- /* now read (and ignore) the resulting byte */
- rd_byte(dd, target, 1);
}
+ for (i = 0; i < len; i++)
+ if (twsi_wr(dd, target, *bp++, 0))
+ goto failed_write;
+
ret = 0;
- goto bail;
failed_write:
stop_cmd(dd, target);
- ret = 1;
bail:
return ret;
diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h
index 5907e029613d..5b8a5b5e7eae 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/staging/rdma/hfi1/twsi.h
@@ -1,14 +1,13 @@
#ifndef _TWSI_H
#define _TWSI_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -54,8 +51,9 @@
struct hfi1_devdata;
-/* Bit position of SDA pin in ASIC_QSFP* registers */
+/* Bit position of SDA/SCL pins in ASIC_QSFP* registers */
#define GPIO_SDA_NUM 1
+#define GPIO_SCL_NUM 0
/* these functions must be called with qsfp_lock held */
int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
@@ -64,5 +62,4 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
const void *buffer, int len);
-
#endif /* _TWSI_H */
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index 4f2a7889a852..df773d433297 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -49,71 +46,82 @@
*/
#include "hfi.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
#include "qp.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
+/* only opcode mask for adaptive pio */
+const u32 uc_only_opcode =
+ BIT(OP(SEND_ONLY) & 0x1f) |
+ BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+ BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+ BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
*
+ * Assume s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
-int hfi1_make_uc_req(struct hfi1_qp *qp)
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_other_headers *ohdr;
- struct hfi1_swqe *wqe;
- unsigned long flags;
+ struct rvt_swqe *wqe;
u32 hwords = 5;
u32 bth0 = 0;
u32 len;
u32 pmtu = qp->pmtu;
- int ret = 0;
int middle = 0;
- spin_lock_irqsave(&qp->s_lock, flags);
+ ps->s_txreq = get_txreq(ps->dev, qp);
+ if (IS_ERR(ps->s_txreq))
+ goto bail_no_tx;
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
- if (atomic_read(&qp->s_iowait.sdma_busy)) {
- qp->s_flags |= HFI1_S_WAIT_DMA;
+ if (iowait_sdma_pending(&priv->s_iowait)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
clear_ahg(qp);
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
- goto done;
+ goto done_free_tx;
}
- ohdr = &qp->s_hdr->ibh.u.oth;
+ ohdr = &ps->s_txreq->phdr.hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr->ibh.u.l.oth;
+ ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Get the next send request. */
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
- if (!(ib_hfi1_state_ops[qp->state] &
- HFI1_PROCESS_NEXT_SEND_OK))
+ if (!(ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
- if (qp->s_cur == qp->s_head) {
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_cur == ACCESS_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
}
/*
* Start a new request.
*/
- wqe->psn = qp->s_next_psn;
- qp->s_psn = qp->s_next_psn;
+ qp->s_psn = wqe->psn;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -128,9 +136,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
len = pmtu;
break;
}
- if (wqe->wr.opcode == IB_WR_SEND)
+ if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_ONLY);
- else {
+ } else {
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
@@ -157,9 +165,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
len = pmtu;
break;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
qp->s_state = OP(RDMA_WRITE_ONLY);
- else {
+ } else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
@@ -188,9 +196,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
- if (wqe->wr.opcode == IB_WR_SEND)
+ if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_LAST);
- else {
+ } else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -213,9 +221,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
qp->s_state = OP(RDMA_WRITE_LAST);
- else {
+ } else {
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
@@ -231,19 +239,28 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
}
qp->s_len -= len;
qp->s_hdrwords = hwords;
+ ps->s_txreq->sde = priv->s_sde;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
- mask_psn(qp->s_next_psn++), middle);
-done:
- ret = 1;
- goto unlock;
+ mask_psn(qp->s_psn++), middle, ps);
+ /* pbc */
+ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+ return 1;
+
+done_free_tx:
+ hfi1_put_txreq(ps->s_txreq);
+ ps->s_txreq = NULL;
+ return 1;
bail:
- qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- return ret;
+ hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+ ps->s_txreq = NULL;
+ qp->s_flags &= ~RVT_S_BUSY;
+ qp->s_hdrwords = 0;
+ return 0;
}
/**
@@ -266,7 +283,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
- struct hfi1_qp *qp = packet->qp;
+ struct rvt_qp *qp = packet->qp;
struct hfi1_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
@@ -291,14 +308,14 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
u16 rlid = be16_to_cpu(hdr->lrh[3]);
u8 sl, sc5;
- lqpn = bth1 & HFI1_QPN_MASK;
+ lqpn = bth1 & RVT_QPN_MASK;
rqpn = qp->remote_qpn;
sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
sl = ibp->sc_to_sl[sc5];
process_becn(ppd, sl, rlid, lqpn, rqpn,
- IB_CC_SVCTYPE_UC);
+ IB_CC_SVCTYPE_UC);
}
if (bth1 & HFI1_FECN_SMASK) {
@@ -331,10 +348,11 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
inv:
if (qp->r_state == OP(SEND_FIRST) ||
qp->r_state == OP(SEND_MIDDLE)) {
- set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+ set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
- } else
- hfi1_put_ss(&qp->r_sge);
+ } else {
+ rvt_put_ss(&qp->r_sge);
+ }
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
@@ -381,7 +399,7 @@ inv:
goto inv;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
qp_comm_est(qp);
/* OK, process the packet. */
@@ -390,10 +408,10 @@ inv:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
- if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
+ if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
qp->r_sge = qp->s_rdma_read_sge;
- else {
- ret = hfi1_get_rwqe(qp, 0);
+ } else {
+ ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto op_err;
if (!ret)
@@ -417,7 +435,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -442,8 +460,8 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 0);
- hfi1_put_ss(&qp->s_rdma_read_sge);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+ rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -468,9 +486,9 @@ last_imm:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- (ohdr->bth[0] &
- cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
@@ -491,8 +509,8 @@ rdma_first:
int ok;
/* Check rkey */
- ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
- vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+ ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+ vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto drop;
qp->r_sge.num_sge = 1;
@@ -503,9 +521,9 @@ rdma_first:
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
- if (opcode == OP(RDMA_WRITE_ONLY))
+ if (opcode == OP(RDMA_WRITE_ONLY)) {
goto rdma_last;
- else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
+ } else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
@@ -517,7 +535,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -535,10 +553,10 @@ rdma_last_imm:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
- hfi1_put_ss(&qp->s_rdma_read_sge);
- else {
- ret = hfi1_get_rwqe(qp, 1);
+ if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
+ rvt_put_ss(&qp->s_rdma_read_sge);
+ } else {
+ ret = hfi1_rvt_get_rwqe(qp, 1);
if (ret < 0)
goto op_err;
if (!ret)
@@ -546,8 +564,8 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
- hfi1_put_ss(&qp->r_sge);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ rvt_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
@@ -562,8 +580,8 @@ rdma_last:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
- hfi1_put_ss(&qp->r_sge);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ rvt_put_ss(&qp->r_sge);
break;
default:
@@ -575,14 +593,12 @@ rdma_last:
return;
rewind:
- set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+ set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
drop:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
return;
op_err:
hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
- return;
-
}
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index 25e6053c38db..ae8a70f703eb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -53,6 +50,7 @@
#include "hfi.h"
#include "mad.h"
+#include "verbs_txreq.h"
#include "qp.h"
/**
@@ -65,24 +63,25 @@
* Note that the receive interrupt handler may be calling hfi1_ud_rcv()
* while this is being called.
*/
-static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
+static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
- struct hfi1_qp *qp;
+ struct rvt_qp *qp;
struct ib_ah_attr *ah_attr;
unsigned long flags;
- struct hfi1_sge_state ssge;
- struct hfi1_sge *sge;
+ struct rvt_sge_state ssge;
+ struct rvt_sge *sge;
struct ib_wc wc;
u32 length;
enum ib_qp_type sqptype, dqptype;
rcu_read_lock();
- qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+ qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+ swqe->ud_wr.remote_qpn);
if (!qp) {
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
rcu_read_unlock();
return;
}
@@ -93,12 +92,12 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
IB_QPT_UD : qp->ibqp.qp_type;
if (dqptype != sqptype ||
- !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
- ibp->n_pkt_drops++;
+ !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ ibp->rvp.n_pkt_drops++;
goto drop;
}
- ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+ ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -161,35 +160,36 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
/*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_flags & HFI1_R_REUSE_SGE)
- qp->r_flags &= ~HFI1_R_REUSE_SGE;
- else {
+ if (qp->r_flags & RVT_R_REUSE_SGE) {
+ qp->r_flags &= ~RVT_R_REUSE_SGE;
+ } else {
int ret;
- ret = hfi1_get_rwqe(qp, 0);
+ ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0) {
hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
if (qp->ibqp.qp_num == 0)
- ibp->n_vl15_dropped++;
+ ibp->rvp.n_vl15_dropped++;
goto bail_unlock;
}
}
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_flags |= HFI1_R_REUSE_SGE;
- ibp->n_pkt_drops++;
+ qp->r_flags |= RVT_R_REUSE_SGE;
+ ibp->rvp.n_pkt_drops++;
goto bail_unlock;
}
if (ah_attr->ah_flags & IB_AH_GRH) {
hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
- sizeof(struct ib_grh), 1);
+ sizeof(struct ib_grh), 1, 0);
wc.wc_flags |= IB_WC_GRH;
- } else
+ } else {
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ }
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
ssge.num_sge = swqe->wr.num_sge;
@@ -202,7 +202,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+ hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -210,7 +210,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
if (--ssge.num_sge)
*sge = *ssge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= HFI1_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -222,8 +222,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
}
length -= len;
}
- hfi1_put_ss(&qp->r_sge);
- if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+ rvt_put_ss(&qp->r_sge);
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -242,14 +242,14 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
- wc.slid = HFI1_PERMISSIVE_LID;
+ wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
wc.sl = ah_attr->sl;
wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
- ibp->n_loop_pkts++;
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ swqe->wr.send_flags & IB_SEND_SOLICITED);
+ ibp->rvp.n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
drop:
@@ -260,47 +260,53 @@ drop:
* hfi1_make_ud_req - construct a UD request packet
* @qp: the QP
*
+ * Assume s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
-int hfi1_make_ud_req(struct hfi1_qp *qp)
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
- struct hfi1_swqe *wqe;
- unsigned long flags;
+ struct rvt_swqe *wqe;
u32 nwords;
u32 extra_bytes;
u32 bth0;
u16 lrh0;
u16 lid;
- int ret = 0;
int next_cur;
u8 sc5;
- spin_lock_irqsave(&qp->s_lock, flags);
+ ps->s_txreq = get_txreq(ps->dev, qp);
+ if (IS_ERR(ps->s_txreq))
+ goto bail_no_tx;
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
- if (atomic_read(&qp->s_iowait.sdma_busy)) {
- qp->s_flags |= HFI1_S_WAIT_DMA;
+ if (iowait_sdma_pending(&priv->s_iowait)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
- goto done;
+ goto done_free_tx;
}
- if (qp->s_cur == qp->s_head)
+ /* see post_one_send() */
+ smp_read_barrier_depends();
+ if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail;
- wqe = get_swqe_ptr(qp, qp->s_cur);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
next_cur = qp->s_cur + 1;
if (next_cur >= qp->s_size)
next_cur = 0;
@@ -308,13 +314,15 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
- ah_attr->dlid == HFI1_PERMISSIVE_LID) {
+ ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
- if (unlikely(!loopback && (lid == ppd->lid ||
- (lid == HFI1_PERMISSIVE_LID &&
- qp->ibqp.qp_type == IB_QPT_GSI)))) {
+ if (unlikely(!loopback &&
+ (lid == ppd->lid ||
+ (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
+ qp->ibqp.qp_type == IB_QPT_GSI)))) {
+ unsigned long flags;
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
@@ -322,16 +330,17 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
* Instead of waiting, we could queue a
* zero length descriptor so we get a callback.
*/
- if (atomic_read(&qp->s_iowait.sdma_busy)) {
- qp->s_flags |= HFI1_S_WAIT_DMA;
+ if (iowait_sdma_pending(&priv->s_iowait)) {
+ qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
qp->s_cur = next_cur;
+ local_irq_save(flags);
spin_unlock_irqrestore(&qp->s_lock, flags);
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
- goto done;
+ goto done_free_tx;
}
}
@@ -353,11 +362,12 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
- qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
- &ah_attr->grh,
- qp->s_hdrwords, nwords);
+ qp->s_hdrwords += hfi1_make_grh(ibp,
+ &ps->s_txreq->phdr.hdr.u.l.grh,
+ &ah_attr->grh,
+ qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
- ohdr = &qp->s_hdr->ibh.u.l.oth;
+ ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
@@ -365,37 +375,42 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
} else {
/* Header size in 32-bit words. */
lrh0 = HFI1_LRH_BTH;
- ohdr = &qp->s_hdr->ibh.u.oth;
+ ohdr = &ps->s_txreq->phdr.hdr.u.oth;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
- } else
+ } else {
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+ }
sc5 = ibp->sl_to_sc[ah_attr->sl];
lrh0 |= (ah_attr->sl & 0xf) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI) {
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
- qp->s_sc = 0xf;
+ priv->s_sc = 0xf;
} else {
lrh0 |= (sc5 & 0xf) << 12;
- qp->s_sc = sc5;
+ priv->s_sc = sc5;
}
- qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
- qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
- qp->s_hdr->ibh.lrh[2] =
+ priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+ ps->s_txreq->sde = priv->s_sde;
+ priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+ ps->s_txreq->psc = priv->s_sendcontext;
+ ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+ ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+ ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE))
- qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
- else {
+ if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+ } else {
lid = ppd->lid;
if (lid) {
lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
- qp->s_hdr->ibh.lrh[3] = cpu_to_be16(lid);
- } else
- qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
+ ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
+ } else {
+ ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+ }
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
@@ -406,7 +421,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
- ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
+ ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
@@ -415,20 +430,28 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
- qp->s_hdr->ahgcount = 0;
- qp->s_hdr->ahgidx = 0;
- qp->s_hdr->tx_flags = 0;
- qp->s_hdr->sde = NULL;
+ priv->s_hdr->ahgcount = 0;
+ priv->s_hdr->ahgidx = 0;
+ priv->s_hdr->tx_flags = 0;
+ priv->s_hdr->sde = NULL;
+ /* pbc */
+ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+
+ return 1;
-done:
- ret = 1;
- goto unlock;
+done_free_tx:
+ hfi1_put_txreq(ps->s_txreq);
+ ps->s_txreq = NULL;
+ return 1;
bail:
- qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- return ret;
+ hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+ ps->s_txreq = NULL;
+ qp->s_flags &= ~RVT_S_BUSY;
+ qp->s_hdrwords = 0;
+ return 0;
}
/*
@@ -476,7 +499,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
return -1;
}
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh)
{
@@ -550,7 +573,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
* opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
*/
static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
- struct hfi1_qp *qp, u16 slid, struct opa_smp *smp)
+ struct rvt_qp *qp, u16 slid, struct opa_smp *smp)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -607,7 +630,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
case IB_MGMT_METHOD_TRAP:
case IB_MGMT_METHOD_GET_RESP:
case IB_MGMT_METHOD_REPORT_RESP:
- if (ibp->port_cap_flags & IB_PORT_SM)
+ if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return 0;
if (pkey == FULL_MGMT_P_KEY) {
smp->status |= IB_SMP_UNSUP_METHOD;
@@ -624,7 +647,6 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
return 0;
}
-
/**
* hfi1_ud_rcv - receive an incoming UD packet
* @ibp: the port the packet came in on
@@ -654,7 +676,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
- struct hfi1_qp *qp = packet->qp;
+ struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
bool sc4_bit = has_sc4_bit(packet);
u8 sc;
@@ -663,10 +685,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
struct ib_grh *grh = NULL;
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
- src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+ src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
dlid = be16_to_cpu(hdr->lrh[1]);
- is_mcast = (dlid > HFI1_MULTICAST_LID_BASE) &&
- (dlid != HFI1_PERMISSIVE_LID);
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
bth1 = be32_to_cpu(ohdr->bth[1]);
if (unlikely(bth1 & HFI1_BECN_SMASK)) {
/*
@@ -674,7 +696,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* error path.
*/
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 lqpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+ u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
u8 sl, sc5;
sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -750,7 +772,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
if (mgmt_pkey_idx < 0)
goto drop;
-
}
if (unlikely(qkey != qp->qkey)) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
@@ -788,7 +809,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
if (mgmt_pkey_idx < 0)
goto drop;
-
}
if (qp->ibqp.qp_num > 1 &&
@@ -799,8 +819,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
- } else
+ } else {
goto drop;
+ }
/*
* A GRH is expected to precede the data even if not
@@ -811,36 +832,38 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
/*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_flags & HFI1_R_REUSE_SGE)
- qp->r_flags &= ~HFI1_R_REUSE_SGE;
- else {
+ if (qp->r_flags & RVT_R_REUSE_SGE) {
+ qp->r_flags &= ~RVT_R_REUSE_SGE;
+ } else {
int ret;
- ret = hfi1_get_rwqe(qp, 0);
+ ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0) {
hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
if (qp->ibqp.qp_num == 0)
- ibp->n_vl15_dropped++;
+ ibp->rvp.n_vl15_dropped++;
return;
}
}
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_flags |= HFI1_R_REUSE_SGE;
+ qp->r_flags |= RVT_R_REUSE_SGE;
goto drop;
}
if (has_grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
- sizeof(struct ib_grh), 1);
+ sizeof(struct ib_grh), 1, 0);
wc.wc_flags |= IB_WC_GRH;
- } else
+ } else {
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
- hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
- hfi1_put_ss(&qp->r_sge);
- if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+ }
+ hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ 1, 0);
+ rvt_put_ss(&qp->r_sge);
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -862,8 +885,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
}
}
wc.pkey_index = (unsigned)mgmt_pkey_idx;
- } else
+ } else {
wc.pkey_index = 0;
+ }
wc.slid = be16_to_cpu(hdr->lrh[3]);
sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -873,15 +897,15 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
- wc.dlid_path_bits = dlid >= HFI1_MULTICAST_LID_BASE ? 0 :
+ wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- (ohdr->bth[0] &
- cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
return;
drop:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
}
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
new file mode 100644
index 000000000000..0861e095df8d
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -0,0 +1,1044 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <asm/page.h>
+
+#include "user_exp_rcv.h"
+#include "trace.h"
+#include "mmu_rb.h"
+
+struct tid_group {
+ struct list_head list;
+ unsigned base;
+ u8 size;
+ u8 used;
+ u8 map;
+};
+
+struct tid_rb_node {
+ struct mmu_rb_node mmu;
+ unsigned long phys;
+ struct tid_group *grp;
+ u32 rcventry;
+ dma_addr_t dma_addr;
+ bool freed;
+ unsigned npages;
+ struct page *pages[0];
+};
+
+struct tid_pageset {
+ u16 idx;
+ u16 count;
+};
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define num_user_pages(vaddr, len) \
+ (1 + (((((unsigned long)(vaddr) + \
+ (unsigned long)(len) - 1) & PAGE_MASK) - \
+ ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
+ struct rb_root *);
+static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
+static int set_rcvarray_entry(struct file *, unsigned long, u32,
+ struct tid_group *, struct page **, unsigned);
+static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
+ struct tid_pageset *, unsigned, u16, struct page **,
+ u32 *, unsigned *, unsigned *);
+static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+
+static struct mmu_rb_ops tid_rb_ops = {
+ .insert = mmu_rb_insert,
+ .remove = mmu_rb_remove,
+ .invalidate = mmu_rb_invalidate
+};
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+ u32 pair = rcventry & ~0x1;
+
+ return EXP_TID_SET(IDX, pair >> 1) |
+ EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+static inline void exp_tid_group_init(struct exp_tid_set *set)
+{
+ INIT_LIST_HEAD(&set->list);
+ set->count = 0;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_del_init(&grp->list);
+ set->count--;
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_add_tail(&grp->list, &set->list);
+ set->count++;
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+ struct tid_group *grp =
+ list_first_entry(&set->list, struct tid_group, list);
+ list_del_init(&grp->list);
+ set->count--;
+ return grp;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+ struct exp_tid_set *s1,
+ struct exp_tid_set *s2)
+{
+ tid_group_remove(group, s1);
+ tid_group_add_tail(group, s2);
+}
+
+/*
+ * Initialize context and file private data needed for Expected
+ * receive caching. This needs to be done after the context has
+ * been configured with the eager/expected RcvEntry counts.
+ */
+int hfi1_user_exp_rcv_init(struct file *fp)
+{
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+ unsigned tidbase;
+ int i, ret = 0;
+
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->tid_rb_root = RB_ROOT;
+
+ if (!uctxt->subctxt_cnt || !fd->subctxt) {
+ exp_tid_group_init(&uctxt->tid_group_list);
+ exp_tid_group_init(&uctxt->tid_used_list);
+ exp_tid_group_init(&uctxt->tid_full_list);
+
+ tidbase = uctxt->expected_base;
+ for (i = 0; i < uctxt->expected_count /
+ dd->rcv_entries.group_size; i++) {
+ struct tid_group *grp;
+
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp) {
+ /*
+ * If we fail here, the groups already
+ * allocated will be freed by the close
+ * call.
+ */
+ ret = -ENOMEM;
+ goto done;
+ }
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &uctxt->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+ }
+
+ fd->entry_to_rb = kcalloc(uctxt->expected_count,
+ sizeof(struct rb_node *),
+ GFP_KERNEL);
+ if (!fd->entry_to_rb)
+ return -ENOMEM;
+
+ if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+ fd->invalid_tid_idx = 0;
+ fd->invalid_tids = kzalloc(uctxt->expected_count *
+ sizeof(u32), GFP_KERNEL);
+ if (!fd->invalid_tids) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * Register MMU notifier callbacks. If the registration
+ * fails, continue but turn off the TID caching for
+ * all user contexts.
+ */
+ ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
+ if (ret) {
+ dd_dev_info(dd,
+ "Failed MMU notifier registration %d\n",
+ ret);
+ HFI1_CAP_USET(TID_UNMAP);
+ ret = 0;
+ }
+ }
+
+ /*
+ * PSM does not have a good way to separate, count, and
+ * effectively enforce a limit on RcvArray entries used by
+ * subctxts (when context sharing is used) when TID caching
+ * is enabled. To help with that, we calculate a per-process
+ * RcvArray entry share and enforce that.
+ * If TID caching is not in use, PSM deals with usage on its
+ * own. In that case, we allow any subctxt to take all of the
+ * entries.
+ *
+ * Make sure that we set the tid counts only after successful
+ * init.
+ */
+ spin_lock(&fd->tid_lock);
+ if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+ u16 remainder;
+
+ fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
+ remainder = uctxt->expected_count % uctxt->subctxt_cnt;
+ if (remainder && fd->subctxt < remainder)
+ fd->tid_limit++;
+ } else {
+ fd->tid_limit = uctxt->expected_count;
+ }
+ spin_unlock(&fd->tid_lock);
+done:
+ return ret;
+}
+
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct tid_group *grp, *gptr;
+
+ /*
+ * The notifier would have been removed when the process'es mm
+ * was freed.
+ */
+ if (!HFI1_CAP_IS_USET(TID_UNMAP))
+ hfi1_mmu_rb_unregister(&fd->tid_rb_root);
+
+ kfree(fd->invalid_tids);
+
+ if (!uctxt->cnt) {
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list,
+ &fd->tid_rb_root);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list,
+ &fd->tid_rb_root);
+ list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+ list) {
+ list_del_init(&grp->list);
+ kfree(grp);
+ }
+ hfi1_clear_tids(uctxt);
+ }
+
+ kfree(fd->entry_to_rb);
+ return 0;
+}
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+ /*
+ * Doing the WC fill writes only makes sense if the device is
+ * present and the RcvArray has been mapped as WC memory.
+ */
+ if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+ writeq(0, dd->rcvarray_wc + (index * 8));
+}
+
+/*
+ * RcvArray entry allocation for Expected Receives is done by the
+ * following algorithm:
+ *
+ * The context keeps 3 lists of groups of RcvArray entries:
+ * 1. List of empty groups - tid_group_list
+ * This list is created during user context creation and
+ * contains elements which describe sets (of 8) of empty
+ * RcvArray entries.
+ * 2. List of partially used groups - tid_used_list
+ * This list contains sets of RcvArray entries which are
+ * not completely used up. Another mapping request could
+ * use some of all of the remaining entries.
+ * 3. List of full groups - tid_full_list
+ * This is the list where sets that are completely used
+ * up go.
+ *
+ * An attempt to optimize the usage of RcvArray entries is
+ * made by finding all sets of physically contiguous pages in a
+ * user's buffer.
+ * These physically contiguous sets are further split into
+ * sizes supported by the receive engine of the HFI. The
+ * resulting sets of pages are stored in struct tid_pageset,
+ * which describes the sets as:
+ * * .count - number of pages in this set
+ * * .idx - starting index into struct page ** array
+ * of this set
+ *
+ * From this point on, the algorithm deals with the page sets
+ * described above. The number of pagesets is divided by the
+ * RcvArray group size to produce the number of full groups
+ * needed.
+ *
+ * Groups from the 3 lists are manipulated using the following
+ * rules:
+ * 1. For each set of 8 pagesets, a complete group from
+ * tid_group_list is taken, programmed, and moved to
+ * the tid_full_list list.
+ * 2. For all remaining pagesets:
+ * 2.1 If the tid_used_list is empty and the tid_group_list
+ * is empty, stop processing pageset and return only
+ * what has been programmed up to this point.
+ * 2.2 If the tid_used_list is empty and the tid_group_list
+ * is not empty, move a group from tid_group_list to
+ * tid_used_list.
+ * 2.3 For each group is tid_used_group, program as much as
+ * can fit into the group. If the group becomes fully
+ * used, move it to tid_full_list.
+ */
+int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+ int ret = 0, need_group = 0, pinned;
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+ unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
+ tididx = 0, mapped, mapped_pages = 0;
+ unsigned long vaddr = tinfo->vaddr;
+ struct page **pages = NULL;
+ u32 *tidlist = NULL;
+ struct tid_pageset *pagesets = NULL;
+
+ /* Get the number of pages the user buffer spans */
+ npages = num_user_pages(vaddr, tinfo->length);
+ if (!npages)
+ return -EINVAL;
+
+ if (npages > uctxt->expected_count) {
+ dd_dev_err(dd, "Expected buffer too big\n");
+ return -EINVAL;
+ }
+
+ /* Verify that access is OK for the user buffer */
+ if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
+ npages * PAGE_SIZE)) {
+ dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
+ (void *)vaddr, npages);
+ return -EFAULT;
+ }
+
+ pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets),
+ GFP_KERNEL);
+ if (!pagesets)
+ return -ENOMEM;
+
+ /* Allocate the array of struct page pointers needed for pinning */
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ /*
+ * Pin all the pages of the user buffer. If we can't pin all the
+ * pages, accept the amount pinned so far and program only that.
+ * User space knows how to deal with partially programmed buffers.
+ */
+ if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages))
+ return -ENOMEM;
+ pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
+ if (pinned <= 0) {
+ ret = pinned;
+ goto bail;
+ }
+ fd->tid_n_pinned += npages;
+
+ /* Find sets of physically contiguous pages */
+ npagesets = find_phys_blocks(pages, pinned, pagesets);
+
+ /*
+ * We don't need to access this under a lock since tid_used is per
+ * process and the same process cannot be in hfi1_user_exp_rcv_clear()
+ * and hfi1_user_exp_rcv_setup() at the same time.
+ */
+ spin_lock(&fd->tid_lock);
+ if (fd->tid_used + npagesets > fd->tid_limit)
+ pageset_count = fd->tid_limit - fd->tid_used;
+ else
+ pageset_count = npagesets;
+ spin_unlock(&fd->tid_lock);
+
+ if (!pageset_count)
+ goto bail;
+
+ ngroups = pageset_count / dd->rcv_entries.group_size;
+ tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
+ if (!tidlist) {
+ ret = -ENOMEM;
+ goto nomem;
+ }
+
+ tididx = 0;
+
+ /*
+ * From this point on, we are going to be using shared (between master
+ * and subcontexts) context resources. We need to take the lock.
+ */
+ mutex_lock(&uctxt->exp_lock);
+ /*
+ * The first step is to program the RcvArray entries which are complete
+ * groups.
+ */
+ while (ngroups && uctxt->tid_group_list.count) {
+ struct tid_group *grp =
+ tid_group_pop(&uctxt->tid_group_list);
+
+ ret = program_rcvarray(fp, vaddr, grp, pagesets,
+ pageidx, dd->rcv_entries.group_size,
+ pages, tidlist, &tididx, &mapped);
+ /*
+ * If there was a failure to program the RcvArray
+ * entries for the entire group, reset the grp fields
+ * and add the grp back to the free group list.
+ */
+ if (ret <= 0) {
+ tid_group_add_tail(grp, &uctxt->tid_group_list);
+ hfi1_cdbg(TID,
+ "Failed to program RcvArray group %d", ret);
+ goto unlock;
+ }
+
+ tid_group_add_tail(grp, &uctxt->tid_full_list);
+ ngroups--;
+ pageidx += ret;
+ mapped_pages += mapped;
+ }
+
+ while (pageidx < pageset_count) {
+ struct tid_group *grp, *ptr;
+ /*
+ * If we don't have any partially used tid groups, check
+ * if we have empty groups. If so, take one from there and
+ * put in the partially used list.
+ */
+ if (!uctxt->tid_used_list.count || need_group) {
+ if (!uctxt->tid_group_list.count)
+ goto unlock;
+
+ grp = tid_group_pop(&uctxt->tid_group_list);
+ tid_group_add_tail(grp, &uctxt->tid_used_list);
+ need_group = 0;
+ }
+ /*
+ * There is an optimization opportunity here - instead of
+ * fitting as many page sets as we can, check for a group
+ * later on in the list that could fit all of them.
+ */
+ list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
+ list) {
+ unsigned use = min_t(unsigned, pageset_count - pageidx,
+ grp->size - grp->used);
+
+ ret = program_rcvarray(fp, vaddr, grp, pagesets,
+ pageidx, use, pages, tidlist,
+ &tididx, &mapped);
+ if (ret < 0) {
+ hfi1_cdbg(TID,
+ "Failed to program RcvArray entries %d",
+ ret);
+ ret = -EFAULT;
+ goto unlock;
+ } else if (ret > 0) {
+ if (grp->used == grp->size)
+ tid_group_move(grp,
+ &uctxt->tid_used_list,
+ &uctxt->tid_full_list);
+ pageidx += ret;
+ mapped_pages += mapped;
+ need_group = 0;
+ /* Check if we are done so we break out early */
+ if (pageidx >= pageset_count)
+ break;
+ } else if (WARN_ON(ret == 0)) {
+ /*
+ * If ret is 0, we did not program any entries
+ * into this group, which can only happen if
+ * we've screwed up the accounting somewhere.
+ * Warn and try to continue.
+ */
+ need_group = 1;
+ }
+ }
+ }
+unlock:
+ mutex_unlock(&uctxt->exp_lock);
+nomem:
+ hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
+ mapped_pages, ret);
+ if (tididx) {
+ spin_lock(&fd->tid_lock);
+ fd->tid_used += tididx;
+ spin_unlock(&fd->tid_lock);
+ tinfo->tidcnt = tididx;
+ tinfo->length = mapped_pages * PAGE_SIZE;
+
+ if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
+ tidlist, sizeof(tidlist[0]) * tididx)) {
+ /*
+ * On failure to copy to the user level, we need to undo
+ * everything done so far so we don't leak resources.
+ */
+ tinfo->tidlist = (unsigned long)&tidlist;
+ hfi1_user_exp_rcv_clear(fp, tinfo);
+ tinfo->tidlist = 0;
+ ret = -EFAULT;
+ goto bail;
+ }
+ }
+
+ /*
+ * If not everything was mapped (due to insufficient RcvArray entries,
+ * for example), unpin all unmapped pages so we can pin them nex time.
+ */
+ if (mapped_pages != pinned) {
+ hfi1_release_user_pages(current->mm, &pages[mapped_pages],
+ pinned - mapped_pages,
+ false);
+ fd->tid_n_pinned -= pinned - mapped_pages;
+ }
+bail:
+ kfree(pagesets);
+ kfree(pages);
+ kfree(tidlist);
+ return ret > 0 ? 0 : ret;
+}
+
+int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+ int ret = 0;
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ u32 *tidinfo;
+ unsigned tididx;
+
+ tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
+ if (!tidinfo)
+ return -ENOMEM;
+
+ if (copy_from_user(tidinfo, (void __user *)(unsigned long)
+ tinfo->tidlist, sizeof(tidinfo[0]) *
+ tinfo->tidcnt)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ mutex_lock(&uctxt->exp_lock);
+ for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
+ ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+ if (ret) {
+ hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
+ ret);
+ break;
+ }
+ }
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= tididx;
+ spin_unlock(&fd->tid_lock);
+ tinfo->tidcnt = tididx;
+ mutex_unlock(&uctxt->exp_lock);
+done:
+ kfree(tidinfo);
+ return ret;
+}
+
+int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ unsigned long *ev = uctxt->dd->events +
+ (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
+ u32 *array;
+ int ret = 0;
+
+ if (!fd->invalid_tids)
+ return -EINVAL;
+
+ /*
+ * copy_to_user() can sleep, which will leave the invalid_lock
+ * locked and cause the MMU notifier to be blocked on the lock
+ * for a long time.
+ * Copy the data to a local buffer so we can release the lock.
+ */
+ array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL);
+ if (!array)
+ return -EFAULT;
+
+ spin_lock(&fd->invalid_lock);
+ if (fd->invalid_tid_idx) {
+ memcpy(array, fd->invalid_tids, sizeof(*array) *
+ fd->invalid_tid_idx);
+ memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) *
+ fd->invalid_tid_idx);
+ tinfo->tidcnt = fd->invalid_tid_idx;
+ fd->invalid_tid_idx = 0;
+ /*
+ * Reset the user flag while still holding the lock.
+ * Otherwise, PSM can miss events.
+ */
+ clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+ } else {
+ tinfo->tidcnt = 0;
+ }
+ spin_unlock(&fd->invalid_lock);
+
+ if (tinfo->tidcnt) {
+ if (copy_to_user((void __user *)tinfo->tidlist,
+ array, sizeof(*array) * tinfo->tidcnt))
+ ret = -EFAULT;
+ }
+ kfree(array);
+
+ return ret;
+}
+
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+ struct tid_pageset *list)
+{
+ unsigned pagecount, pageidx, setcount = 0, i;
+ unsigned long pfn, this_pfn;
+
+ if (!npages)
+ return 0;
+
+ /*
+ * Look for sets of physically contiguous pages in the user buffer.
+ * This will allow us to optimize Expected RcvArray entry usage by
+ * using the bigger supported sizes.
+ */
+ pfn = page_to_pfn(pages[0]);
+ for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
+ this_pfn = i < npages ? page_to_pfn(pages[i]) : 0;
+
+ /*
+ * If the pfn's are not sequential, pages are not physically
+ * contiguous.
+ */
+ if (this_pfn != ++pfn) {
+ /*
+ * At this point we have to loop over the set of
+ * physically contiguous pages and break them down it
+ * sizes supported by the HW.
+ * There are two main constraints:
+ * 1. The max buffer size is MAX_EXPECTED_BUFFER.
+ * If the total set size is bigger than that
+ * program only a MAX_EXPECTED_BUFFER chunk.
+ * 2. The buffer size has to be a power of two. If
+ * it is not, round down to the closes power of
+ * 2 and program that size.
+ */
+ while (pagecount) {
+ int maxpages = pagecount;
+ u32 bufsize = pagecount * PAGE_SIZE;
+
+ if (bufsize > MAX_EXPECTED_BUFFER)
+ maxpages =
+ MAX_EXPECTED_BUFFER >>
+ PAGE_SHIFT;
+ else if (!is_power_of_2(bufsize))
+ maxpages =
+ rounddown_pow_of_two(bufsize) >>
+ PAGE_SHIFT;
+
+ list[setcount].idx = pageidx;
+ list[setcount].count = maxpages;
+ pagecount -= maxpages;
+ pageidx += maxpages;
+ setcount++;
+ }
+ pageidx = i;
+ pagecount = 1;
+ pfn = this_pfn;
+ } else {
+ pagecount++;
+ }
+ }
+ return setcount;
+}
+
+/**
+ * program_rcvarray() - program an RcvArray group with receive buffers
+ * @fp: file pointer
+ * @vaddr: starting user virtual address
+ * @grp: RcvArray group
+ * @sets: array of struct tid_pageset holding information on physically
+ * contiguous chunks from the user buffer
+ * @start: starting index into sets array
+ * @count: number of struct tid_pageset's to program
+ * @pages: an array of struct page * for the user buffer
+ * @tidlist: the array of u32 elements when the information about the
+ * programmed RcvArray entries is to be encoded.
+ * @tididx: starting offset into tidlist
+ * @pmapped: (output parameter) number of pages programmed into the RcvArray
+ * entries.
+ *
+ * This function will program up to 'count' number of RcvArray entries from the
+ * group 'grp'. To make best use of write-combining writes, the function will
+ * perform writes to the unused RcvArray entries which will be ignored by the
+ * HW. Each RcvArray entry will be programmed with a physically contiguous
+ * buffer chunk from the user's virtual buffer.
+ *
+ * Return:
+ * -EINVAL if the requested count is larger than the size of the group,
+ * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
+ * number of RcvArray entries programmed.
+ */
+static int program_rcvarray(struct file *fp, unsigned long vaddr,
+ struct tid_group *grp,
+ struct tid_pageset *sets,
+ unsigned start, u16 count, struct page **pages,
+ u32 *tidlist, unsigned *tididx, unsigned *pmapped)
+{
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+ u16 idx;
+ u32 tidinfo = 0, rcventry, useidx = 0;
+ int mapped = 0;
+
+ /* Count should never be larger than the group size */
+ if (count > grp->size)
+ return -EINVAL;
+
+ /* Find the first unused entry in the group */
+ for (idx = 0; idx < grp->size; idx++) {
+ if (!(grp->map & (1 << idx))) {
+ useidx = idx;
+ break;
+ }
+ rcv_array_wc_fill(dd, grp->base + idx);
+ }
+
+ idx = 0;
+ while (idx < count) {
+ u16 npages, pageidx, setidx = start + idx;
+ int ret = 0;
+
+ /*
+ * If this entry in the group is used, move to the next one.
+ * If we go past the end of the group, exit the loop.
+ */
+ if (useidx >= grp->size) {
+ break;
+ } else if (grp->map & (1 << useidx)) {
+ rcv_array_wc_fill(dd, grp->base + useidx);
+ useidx++;
+ continue;
+ }
+
+ rcventry = grp->base + useidx;
+ npages = sets[setidx].count;
+ pageidx = sets[setidx].idx;
+
+ ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+ rcventry, grp, pages + pageidx,
+ npages);
+ if (ret)
+ return ret;
+ mapped += npages;
+
+ tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
+ EXP_TID_SET(LEN, npages);
+ tidlist[(*tididx)++] = tidinfo;
+ grp->used++;
+ grp->map |= 1 << useidx++;
+ idx++;
+ }
+
+ /* Fill the rest of the group with "blank" writes */
+ for (; useidx < grp->size; useidx++)
+ rcv_array_wc_fill(dd, grp->base + useidx);
+ *pmapped = mapped;
+ return idx;
+}
+
+static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+ u32 rcventry, struct tid_group *grp,
+ struct page **pages, unsigned npages)
+{
+ int ret;
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct tid_rb_node *node;
+ struct hfi1_devdata *dd = uctxt->dd;
+ struct rb_root *root = &fd->tid_rb_root;
+ dma_addr_t phys;
+
+ /*
+ * Allocate the node first so we can handle a potential
+ * failure before we've programmed anything.
+ */
+ node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
+ GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ phys = pci_map_single(dd->pcidev,
+ __va(page_to_phys(pages[0])),
+ npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, phys)) {
+ dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
+ phys);
+ kfree(node);
+ return -EFAULT;
+ }
+
+ node->mmu.addr = vaddr;
+ node->mmu.len = npages * PAGE_SIZE;
+ node->phys = page_to_phys(pages[0]);
+ node->npages = npages;
+ node->rcventry = rcventry;
+ node->dma_addr = phys;
+ node->grp = grp;
+ node->freed = false;
+ memcpy(node->pages, pages, sizeof(struct page *) * npages);
+
+ if (HFI1_CAP_IS_USET(TID_UNMAP))
+ ret = mmu_rb_insert(root, &node->mmu);
+ else
+ ret = hfi1_mmu_rb_insert(root, &node->mmu);
+
+ if (ret) {
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+ node->rcventry, node->mmu.addr, node->phys, ret);
+ pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ kfree(node);
+ return -EFAULT;
+ }
+ hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
+ trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
+ node->mmu.addr, node->phys, phys);
+ return 0;
+}
+
+static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+ struct tid_group **grp)
+{
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+ struct tid_rb_node *node;
+ u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+ u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
+
+ if (tididx >= uctxt->expected_count) {
+ dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+ tididx, uctxt->ctxt);
+ return -EINVAL;
+ }
+
+ if (tidctrl == 0x3)
+ return -EINVAL;
+
+ rcventry = tididx + (tidctrl - 1);
+
+ node = fd->entry_to_rb[rcventry];
+ if (!node || node->rcventry != (uctxt->expected_base + rcventry))
+ return -EBADF;
+ if (HFI1_CAP_IS_USET(TID_UNMAP))
+ mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false);
+ else
+ hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
+
+ if (grp)
+ *grp = node->grp;
+ clear_tid_node(fd, fd->subctxt, node);
+ return 0;
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
+ struct tid_rb_node *node)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+
+ trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
+ node->npages, node->mmu.addr, node->phys,
+ node->dma_addr);
+
+ hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
+ /*
+ * Make sure device has seen the write before we unpin the
+ * pages.
+ */
+ flush_wc();
+
+ pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
+ PCI_DMA_FROMDEVICE);
+ hfi1_release_user_pages(current->mm, node->pages, node->npages, true);
+ fd->tid_n_pinned -= node->npages;
+
+ node->grp->used--;
+ node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
+
+ if (node->grp->used == node->grp->size - 1)
+ tid_group_move(node->grp, &uctxt->tid_full_list,
+ &uctxt->tid_used_list);
+ else if (!node->grp->used)
+ tid_group_move(node->grp, &uctxt->tid_used_list,
+ &uctxt->tid_group_list);
+ kfree(node);
+}
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+ struct exp_tid_set *set, struct rb_root *root)
+{
+ struct tid_group *grp, *ptr;
+ struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata,
+ tid_rb_root);
+ int i;
+
+ list_for_each_entry_safe(grp, ptr, &set->list, list) {
+ list_del_init(&grp->list);
+
+ for (i = 0; i < grp->size; i++) {
+ if (grp->map & (1 << i)) {
+ u16 rcventry = grp->base + i;
+ struct tid_rb_node *node;
+
+ node = fd->entry_to_rb[rcventry -
+ uctxt->expected_base];
+ if (!node || node->rcventry != rcventry)
+ continue;
+ if (HFI1_CAP_IS_USET(TID_UNMAP))
+ mmu_rb_remove(&fd->tid_rb_root,
+ &node->mmu, false);
+ else
+ hfi1_mmu_rb_remove(&fd->tid_rb_root,
+ &node->mmu);
+ clear_tid_node(fd, -1, node);
+ }
+ }
+ }
+}
+
+static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
+ struct tid_rb_node *node =
+ container_of(mnode, struct tid_rb_node, mmu);
+
+ if (node->freed)
+ return 0;
+
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ node->rcventry, node->npages, node->dma_addr);
+ node->freed = true;
+
+ spin_lock(&fdata->invalid_lock);
+ if (fdata->invalid_tid_idx < uctxt->expected_count) {
+ fdata->invalid_tids[fdata->invalid_tid_idx] =
+ rcventry2tidinfo(node->rcventry - uctxt->expected_base);
+ fdata->invalid_tids[fdata->invalid_tid_idx] |=
+ EXP_TID_SET(LEN, node->npages);
+ if (!fdata->invalid_tid_idx) {
+ unsigned long *ev;
+
+ /*
+ * hfi1_set_uevent_bits() sets a user event flag
+ * for all processes. Because calling into the
+ * driver to process TID cache invalidations is
+ * expensive and TID cache invalidations are
+ * handled on a per-process basis, we can
+ * optimize this to set the flag only for the
+ * process in question.
+ */
+ ev = uctxt->dd->events +
+ (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+ }
+ fdata->invalid_tid_idx++;
+ }
+ spin_unlock(&fdata->invalid_lock);
+ return 0;
+}
+
+static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
+{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
+ u32 base = fdata->uctxt->expected_base;
+
+ fdata->entry_to_rb[tnode->rcventry - base] = tnode;
+ return 0;
+}
+
+static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
+ bool notifier)
+{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
+ u32 base = fdata->uctxt->expected_base;
+
+ fdata->entry_to_rb[tnode->rcventry - base] = NULL;
+}
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/staging/rdma/hfi1/user_exp_rcv.h
index 4f4876e1d353..9bc8d9fba87e 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.h
@@ -1,14 +1,13 @@
#ifndef _HFI1_USER_EXP_RCV_H
#define _HFI1_USER_EXP_RCV_H
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -20,8 +19,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -50,6 +47,8 @@
*
*/
+#include "hfi.h"
+
#define EXP_TID_TIDLEN_MASK 0x7FFULL
#define EXP_TID_TIDLEN_SHIFT 0
#define EXP_TID_TIDCTRL_MASK 0x3ULL
@@ -71,4 +70,10 @@
(tid) |= EXP_TID_SET(field, (value)); \
} while (0)
+int hfi1_user_exp_rcv_init(struct file *);
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
+int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+
#endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c
index 8ebfe9ee0d76..88e10b5f55f1 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/staging/rdma/hfi1/user_pages.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -51,32 +48,62 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/device.h>
+#include <linux/module.h>
#include "hfi.h"
-/**
- * hfi1_map_page - a safety wrapper around pci_map_page()
+static unsigned long cache_size = 256;
+module_param(cache_size, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
+
+/*
+ * Determine whether the caller can pin pages.
+ *
+ * This function should be used in the implementation of buffer caches.
+ * The cache implementation should call this function prior to attempting
+ * to pin buffer pages in order to determine whether they should do so.
+ * The function computes cache limits based on the configured ulimit and
+ * cache size. Use of this function is especially important for caches
+ * which are not limited in any other way (e.g. by HW resources) and, thus,
+ * could keeping caching buffers.
*
*/
-dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page,
- unsigned long offset, size_t size, int direction)
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
{
- return pci_map_page(hwdev, page, offset, size, direction);
-}
-
-int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
- struct page **pages)
-{
- unsigned long pinned, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
+ size = (cache_size * (1UL << 20)); /* convert to bytes */
+ unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
bool can_lock = capable(CAP_IPC_LOCK);
- int ret;
+
+ /*
+ * Calculate per-cache size. The calculation below uses only a quarter
+ * of the available per-context limit. This leaves space for other
+ * pinning. Should we worry about shared ctxts?
+ */
+ cache_limit = (ulimit / usr_ctxts) / 4;
+
+ /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
+ if (ulimit != (-1UL) && size > cache_limit)
+ size = cache_limit;
+
+ /* Convert to number of pages */
+ size = DIV_ROUND_UP(size, PAGE_SIZE);
down_read(&current->mm->mmap_sem);
pinned = current->mm->pinned_vm;
up_read(&current->mm->mmap_sem);
- if (pinned + npages > lock_limit && !can_lock)
- return -ENOMEM;
+ /* First, check the absolute limit against all pinned pages. */
+ if (pinned + npages >= ulimit && !can_lock)
+ return false;
+
+ return ((nlocked + npages) <= size) || can_lock;
+}
+
+int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
+ struct page **pages)
+{
+ int ret;
ret = get_user_pages_fast(vaddr, npages, writable, pages);
if (ret < 0)
@@ -89,7 +116,8 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
return ret;
}
-void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
+void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
+ size_t npages, bool dirty)
{
size_t i;
@@ -99,9 +127,9 @@ void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
put_page(p[i]);
}
- if (current->mm) { /* during close after signal, mm can be NULL */
- down_write(&current->mm->mmap_sem);
- current->mm->pinned_vm -= npages;
- up_write(&current->mm->mmap_sem);
+ if (mm) { /* during close after signal, mm can be NULL */
+ down_write(&mm->mmap_sem);
+ mm->pinned_vm -= npages;
+ up_write(&mm->mmap_sem);
}
}
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index 9d4f5d6aaf33..ab6b6a42000f 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -70,6 +67,7 @@
#include "verbs.h" /* for the headers */
#include "common.h" /* for struct hfi1_tid_info */
#include "trace.h"
+#include "mmu_rb.h"
static uint hfi1_sdma_comp_ring_size = 128;
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
@@ -146,7 +144,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* Last packet in the request */
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
-#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
#define SDMA_REQ_IN_USE 0
#define SDMA_REQ_FOR_THREAD 1
@@ -170,16 +167,28 @@ static unsigned initial_pkt_count = 8;
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
struct user_sdma_iovec {
+ struct list_head list;
struct iovec iov;
/* number of pages in this vector */
unsigned npages;
/* array of pinned pages for this vector */
struct page **pages;
- /* offset into the virtual address space of the vector at
- * which we last left off. */
+ /*
+ * offset into the virtual address space of the vector at
+ * which we last left off.
+ */
u64 offset;
};
+struct sdma_mmu_node {
+ struct mmu_rb_node rb;
+ struct list_head list;
+ struct hfi1_user_sdma_pkt_q *pq;
+ atomic_t refcount;
+ struct page **pages;
+ unsigned npages;
+};
+
struct user_sdma_request {
struct sdma_req_info info;
struct hfi1_user_sdma_pkt_q *pq;
@@ -213,15 +222,6 @@ struct user_sdma_request {
*/
u8 omfactor;
/*
- * pointer to the user's mm_struct. We are going to
- * get a reference to it so it doesn't get freed
- * since we might not be in process context when we
- * are processing the iov's.
- * Using this mm_struct, we can get vma based on the
- * iov's address (find_vma()).
- */
- struct mm_struct *user_mm;
- /*
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
*/
@@ -238,13 +238,12 @@ struct user_sdma_request {
u16 tididx;
u32 sent;
u64 seqnum;
+ u64 seqcomp;
+ u64 seqsubmitted;
struct list_head txps;
- spinlock_t txcmp_lock; /* protect txcmp list */
- struct list_head txcmp;
unsigned long flags;
/* status of the last txreq completed */
int status;
- struct work_struct worker;
};
/*
@@ -259,11 +258,6 @@ struct user_sdma_txreq {
struct sdma_txreq txreq;
struct list_head list;
struct user_sdma_request *req;
- struct {
- struct user_sdma_iovec *vec;
- u8 flags;
- } iovecs[3];
- int idx;
u16 flags;
unsigned busycount;
u64 seqnum;
@@ -279,21 +273,21 @@ struct user_sdma_txreq {
static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
-static void user_sdma_delayed_completion(struct work_struct *);
-static void user_sdma_free_request(struct user_sdma_request *);
+static void user_sdma_txreq_cb(struct sdma_txreq *, int);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
+static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
-static void unpin_vector_pages(struct user_sdma_request *,
- struct user_sdma_iovec *);
+static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *,
struct user_sdma_txreq *, u32);
static int set_txreq_header_ahg(struct user_sdma_request *,
struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct user_sdma_request *,
- enum hfi1_sdma_comp_state, int);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
+ struct hfi1_user_sdma_comp_q *,
+ u16, enum hfi1_sdma_comp_state, int);
static inline u32 set_pkt_bth_psn(__be32, u8, u32);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
@@ -303,6 +297,17 @@ static int defer_packet_queue(
struct sdma_txreq *,
unsigned seq);
static void activate_packet_queue(struct iowait *, int);
+static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
+static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+
+static struct mmu_rb_ops sdma_rb_ops = {
+ .filter = sdma_rb_filter,
+ .insert = sdma_rb_insert,
+ .remove = sdma_rb_remove,
+ .invalidate = sdma_rb_invalidate
+};
static int defer_packet_queue(
struct sdma_engine *sde,
@@ -380,7 +385,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
goto pq_nomem;
memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kmalloc(memsize, GFP_KERNEL);
+ pq->reqs = kzalloc(memsize, GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
@@ -392,9 +397,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
+ pq->sdma_rb_root = RB_ROOT;
+ INIT_LIST_HEAD(&pq->evict);
+ spin_lock_init(&pq->evict_lock);
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
- activate_packet_queue);
+ activate_packet_queue, NULL);
pq->reqidx = 0;
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
fd->subctxt);
@@ -421,6 +429,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
cq->nentries = hfi1_sdma_comp_ring_size;
fd->cq = cq;
+ ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+ if (ret) {
+ dd_dev_err(dd, "Failed to register with MMU %d", ret);
+ goto done;
+ }
+
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
list_add(&pq->list, &uctxt->sdma_queues);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
@@ -450,6 +464,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
uctxt->ctxt, fd->subctxt);
pq = fd->pq;
+ hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
if (pq) {
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
if (!list_empty(&pq->list))
@@ -476,7 +491,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
unsigned long dim, unsigned long *count)
{
- int ret = 0, i = 0, sent;
+ int ret = 0, i = 0;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
@@ -502,9 +517,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
dd->unit, uctxt->ctxt, fd->subctxt, ret);
return -EFAULT;
}
+
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info);
- if (cq->comps[info.comp_idx].status == QUEUED) {
+ if (cq->comps[info.comp_idx].status == QUEUED ||
+ test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
dd->unit, uctxt->ctxt, fd->subctxt,
info.comp_idx);
@@ -531,10 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->cq = cq;
req->status = -1;
INIT_LIST_HEAD(&req->txps);
- INIT_LIST_HEAD(&req->txcmp);
- INIT_WORK(&req->worker, user_sdma_delayed_completion);
- spin_lock_init(&req->txcmp_lock);
memcpy(&req->info, &info, sizeof(info));
if (req_opcode(info.ctrl) == EXPECTED)
@@ -593,8 +607,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
- /* Calculate the initial TID offset based on the values of
- KDETH.OFFSET and KDETH.OM that are passed in. */
+ /*
+ * Calculate the initial TID offset based on the values of
+ * KDETH.OFFSET and KDETH.OM that are passed in.
+ */
req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
KDETH_OM_LARGE : KDETH_OM_SMALL);
@@ -603,8 +619,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */
while (i < req->data_iovs) {
+ INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
- req->iovs[i].offset = 0;
+ ret = pin_vector_pages(req, &req->iovs[i]);
+ if (ret) {
+ req->status = ret;
+ goto free_req;
+ }
req->data_len += req->iovs[i++].iov.iov_len;
}
SDMA_DBG(req, "total data length %u", req->data_len);
@@ -668,52 +689,59 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
}
- set_comp_state(req, QUEUED, 0);
+ set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+ atomic_inc(&pq->n_reqs);
/* Send the first N packets in the request to buy us some time */
- sent = user_sdma_send_pkts(req, pcount);
- if (unlikely(sent < 0)) {
- if (sent != -EBUSY) {
- req->status = sent;
- set_comp_state(req, ERROR, req->status);
- return sent;
- } else
- sent = 0;
+ ret = user_sdma_send_pkts(req, pcount);
+ if (unlikely(ret < 0 && ret != -EBUSY)) {
+ req->status = ret;
+ goto free_req;
}
- atomic_inc(&pq->n_reqs);
- xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
- if (sent < req->info.npkts) {
- /*
- * This is a somewhat blocking send implementation.
- * The driver will block the caller until all packets of the
- * request have been submitted to the SDMA engine. However, it
- * will not wait for send completions.
- */
- while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
- ret = user_sdma_send_pkts(req, pcount);
- if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- return ret;
- }
- wait_event_interruptible_timeout(
- pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
- msecs_to_jiffies(
- SDMA_IOWAIT_TIMEOUT));
+ /*
+ * It is possible that the SDMA engine would have processed all the
+ * submitted packets by the time we get here. Therefore, only set
+ * packet queue state to ACTIVE if there are still uncompleted
+ * requests.
+ */
+ if (atomic_read(&pq->n_reqs))
+ xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+
+ /*
+ * This is a somewhat blocking send implementation.
+ * The driver will block the caller until all packets of the
+ * request have been submitted to the SDMA engine. However, it
+ * will not wait for send completions.
+ */
+ while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+ ret = user_sdma_send_pkts(req, pcount);
+ if (ret < 0) {
+ if (ret != -EBUSY) {
+ req->status = ret;
+ set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (ACCESS_ONCE(req->seqcomp) ==
+ req->seqsubmitted - 1)
+ goto free_req;
+ return ret;
}
+ wait_event_interruptible_timeout(
+ pq->busy.wait_dma,
+ (pq->state == SDMA_PKT_Q_ACTIVE),
+ msecs_to_jiffies(
+ SDMA_IOWAIT_TIMEOUT));
}
-
}
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req);
+ user_sdma_free_request(req, true);
+ pq_update(pq);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
return ret;
}
static inline u32 compute_data_length(struct user_sdma_request *req,
- struct user_sdma_txreq *tx)
+ struct user_sdma_txreq *tx)
{
/*
* Determine the proper size of the packet data.
@@ -731,8 +759,10 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
} else if (req_opcode(req->info.ctrl) == EXPECTED) {
u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
PAGE_SIZE;
- /* Get the data length based on the remaining space in the
- * TID pair. */
+ /*
+ * Get the data length based on the remaining space in the
+ * TID pair.
+ */
len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
/* If we've filled up the TID pair, move to the next one. */
if (unlikely(!len) && ++req->tididx < req->n_tids &&
@@ -742,12 +772,15 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
req->tidoffset = 0;
len = min_t(u32, tidlen, req->info.fragsize);
}
- /* Since the TID pairs map entire pages, make sure that we
+ /*
+ * Since the TID pairs map entire pages, make sure that we
* are not going to try to send more data that we have
- * remaining. */
+ * remaining.
+ */
len = min(len, req->data_len - req->sent);
- } else
+ } else {
len = min(req->data_len - req->sent, (u32)req->info.fragsize);
+ }
SDMA_DBG(req, "Data Length = %u", len);
return len;
}
@@ -810,9 +843,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->flags = 0;
tx->req = req;
tx->busycount = 0;
- tx->idx = -1;
INIT_LIST_HEAD(&tx->list);
- memset(tx->iovecs, 0, sizeof(tx->iovecs));
if (req->seqnum == req->info.npkts - 1)
tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
@@ -833,18 +864,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
WARN_ON(iovec->offset);
}
- /*
- * This request might include only a header and no user
- * data, so pin pages only if there is data and it the
- * pages have not been pinned already.
- */
- if (unlikely(!iovec->pages && iovec->iov.iov_len)) {
- ret = pin_vector_pages(req, iovec);
- if (ret)
- goto free_tx;
- }
-
- tx->iovecs[++tx->idx].vec = iovec;
datalen = compute_data_length(req, tx);
if (!datalen) {
SDMA_DBG(req,
@@ -934,16 +953,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
iovec->pages[pageidx],
offset, len);
if (ret) {
- int i;
-
SDMA_DBG(req, "SDMA txreq add page failed %d\n",
ret);
- /* Mark all assigned vectors as complete so they
- * are unpinned in the callback. */
- for (i = tx->idx; i >= 0; i--) {
- tx->iovecs[i].flags |=
- TXREQ_FLAGS_IOVEC_LAST_PKT;
- }
goto free_txreq;
}
iov_offset += len;
@@ -951,19 +962,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
data_sent += len;
if (unlikely(queued < datalen &&
pageidx == iovec->npages &&
- req->iov_idx < req->data_iovs - 1 &&
- tx->idx < ARRAY_SIZE(tx->iovecs))) {
+ req->iov_idx < req->data_iovs - 1)) {
iovec->offset += iov_offset;
- tx->iovecs[tx->idx].flags |=
- TXREQ_FLAGS_IOVEC_LAST_PKT;
iovec = &req->iovs[++req->iov_idx];
- if (!iovec->pages) {
- ret = pin_vector_pages(req, iovec);
- if (ret)
- goto free_txreq;
- }
iov_offset = 0;
- tx->iovecs[++tx->idx].vec = iovec;
}
}
/*
@@ -974,28 +976,21 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
if (req_opcode(req->info.ctrl) == EXPECTED)
req->tidoffset += datalen;
req->sent += data_sent;
- if (req->data_len) {
- tx->iovecs[tx->idx].vec->offset += iov_offset;
- /* If we've reached the end of the io vector, mark it
- * so the callback can unpin the pages and free it. */
- if (tx->iovecs[tx->idx].vec->offset ==
- tx->iovecs[tx->idx].vec->iov.iov_len)
- tx->iovecs[tx->idx].flags |=
- TXREQ_FLAGS_IOVEC_LAST_PKT;
- }
-
+ if (req->data_len)
+ iovec->offset += iov_offset;
+ list_add_tail(&tx->txreq.list, &req->txps);
/*
* It is important to increment this here as it is used to
* generate the BTH.PSN and, therefore, can't be bulk-updated
* outside of the loop.
*/
tx->seqnum = req->seqnum++;
- list_add_tail(&tx->txreq.list, &req->txps);
npkts++;
}
dosend:
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
- if (list_empty(&req->txps))
+ if (list_empty(&req->txps)) {
+ req->seqsubmitted = req->seqnum;
if (req->seqnum == req->info.npkts) {
set_bit(SDMA_REQ_SEND_DONE, &req->flags);
/*
@@ -1007,6 +1002,10 @@ dosend:
if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
sdma_ahg_free(req->sde, req->ahg_idx);
}
+ } else if (ret > 0) {
+ req->seqsubmitted += ret;
+ ret = 0;
+ }
return ret;
free_txreq:
@@ -1021,7 +1020,7 @@ free_tx:
*/
static inline int num_user_pages(const struct iovec *iov)
{
- const unsigned long addr = (unsigned long) iov->iov_base;
+ const unsigned long addr = (unsigned long)iov->iov_base;
const unsigned long len = iov->iov_len;
const unsigned long spage = addr & PAGE_MASK;
const unsigned long epage = (addr + len - 1) & PAGE_MASK;
@@ -1029,64 +1028,129 @@ static inline int num_user_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-static int pin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec) {
- int pinned, npages;
+/* Caller must hold pq->evict_lock */
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+ u32 cleared = 0;
+ struct sdma_mmu_node *node, *ptr;
- npages = num_user_pages(&iovec->iov);
- iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL);
- if (!iovec->pages) {
- SDMA_DBG(req, "Failed page array alloc");
- return -ENOMEM;
+ list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
+ /* Make sure that no one is still using the node. */
+ if (!atomic_read(&node->refcount)) {
+ /*
+ * Need to use the page count now as the remove callback
+ * will free the node.
+ */
+ cleared += node->npages;
+ spin_unlock(&pq->evict_lock);
+ hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+ spin_lock(&pq->evict_lock);
+ if (cleared >= npages)
+ break;
+ }
}
+ return cleared;
+}
- /*
- * Get a reference to the process's mm so we can use it when
- * unpinning the io vectors.
- */
- req->pq->user_mm = get_task_mm(current);
+static int pin_vector_pages(struct user_sdma_request *req,
+ struct user_sdma_iovec *iovec) {
+ int ret = 0, pinned, npages, cleared;
+ struct page **pages;
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ struct sdma_mmu_node *node = NULL;
+ struct mmu_rb_node *rb_node;
+
+ rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
+ (unsigned long)iovec->iov.iov_base,
+ iovec->iov.iov_len);
+ if (rb_node)
+ node = container_of(rb_node, struct sdma_mmu_node, rb);
+
+ if (!node) {
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
- pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
- npages, 0, iovec->pages);
+ node->rb.addr = (unsigned long)iovec->iov.iov_base;
+ node->rb.len = iovec->iov.iov_len;
+ node->pq = pq;
+ atomic_set(&node->refcount, 0);
+ INIT_LIST_HEAD(&node->list);
+ }
- if (pinned < 0)
- return pinned;
+ npages = num_user_pages(&iovec->iov);
+ if (node->npages < npages) {
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages) {
+ SDMA_DBG(req, "Failed page array alloc");
+ ret = -ENOMEM;
+ goto bail;
+ }
+ memcpy(pages, node->pages, node->npages * sizeof(*pages));
+
+ npages -= node->npages;
+retry:
+ if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+ spin_lock(&pq->evict_lock);
+ cleared = sdma_cache_evict(pq, npages);
+ spin_unlock(&pq->evict_lock);
+ if (cleared >= npages)
+ goto retry;
+ }
+ pinned = hfi1_acquire_user_pages(
+ ((unsigned long)iovec->iov.iov_base +
+ (node->npages * PAGE_SIZE)), npages, 0,
+ pages + node->npages);
+ if (pinned < 0) {
+ kfree(pages);
+ ret = pinned;
+ goto bail;
+ }
+ if (pinned != npages) {
+ unpin_vector_pages(current->mm, pages, pinned);
+ ret = -EFAULT;
+ goto bail;
+ }
+ kfree(node->pages);
+ node->pages = pages;
+ node->npages += pinned;
+ npages = node->npages;
+ spin_lock(&pq->evict_lock);
+ if (!rb_node)
+ list_add(&node->list, &pq->evict);
+ else
+ list_move(&node->list, &pq->evict);
+ pq->n_locked += pinned;
+ spin_unlock(&pq->evict_lock);
+ }
+ iovec->pages = node->pages;
+ iovec->npages = npages;
- iovec->npages = pinned;
- if (pinned != npages) {
- SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
- unpin_vector_pages(req, iovec);
- return -EFAULT;
+ if (!rb_node) {
+ ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+ if (ret) {
+ spin_lock(&pq->evict_lock);
+ list_del(&node->list);
+ pq->n_locked -= node->npages;
+ spin_unlock(&pq->evict_lock);
+ ret = 0;
+ goto bail;
+ }
+ } else {
+ atomic_inc(&node->refcount);
}
return 0;
+bail:
+ if (!rb_node)
+ kfree(node);
+ return ret;
}
-static void unpin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec)
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+ unsigned npages)
{
- /*
- * Unpinning is done through the workqueue so use the
- * process's mm if we have a reference to it.
- */
- if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
- use_mm(req->pq->user_mm);
-
- hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
-
- /*
- * Unuse the user's mm (see above) and release the
- * reference to it.
- */
- if (req->pq->user_mm) {
- if (current->flags & PF_KTHREAD)
- unuse_mm(req->pq->user_mm);
- mmput(req->pq->user_mm);
- }
-
- kfree(iovec->pages);
- iovec->pages = NULL;
- iovec->npages = 0;
- iovec->offset = 0;
+ hfi1_release_user_pages(mm, pages, npages, 0);
+ kfree(pages);
}
static int check_header_template(struct user_sdma_request *req,
@@ -1209,7 +1273,6 @@ static int set_txreq_header(struct user_sdma_request *req,
if (ret)
return ret;
goto done;
-
}
hdr->bth[2] = cpu_to_be32(
@@ -1219,7 +1282,7 @@ static int set_txreq_header(struct user_sdma_request *req,
/* Set ACK request on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
- hdr->bth[2] |= cpu_to_be32(1UL<<31);
+ hdr->bth[2] |= cpu_to_be32(1UL << 31);
/* Set the new offset */
hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
@@ -1233,8 +1296,10 @@ static int set_txreq_header(struct user_sdma_request *req,
if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
PAGE_SIZE)) {
req->tidoffset = 0;
- /* Since we don't copy all the TIDs, all at once,
- * we have to check again. */
+ /*
+ * Since we don't copy all the TIDs, all at once,
+ * we have to check again.
+ */
if (++req->tididx > req->n_tids - 1 ||
!req->tids[req->tididx]) {
return -EINVAL;
@@ -1315,8 +1380,10 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
PAGE_SIZE)) {
req->tidoffset = 0;
- /* Since we don't copy all the TIDs, all at once,
- * we have to check again. */
+ /*
+ * Since we don't copy all the TIDs, all at once,
+ * we have to check again.
+ */
if (++req->tididx > req->n_tids - 1 ||
!req->tids[req->tididx]) {
return -EINVAL;
@@ -1340,8 +1407,9 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
INTR) >> 16);
val &= cpu_to_le16(~(1U << 13));
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
- } else
+ } else {
AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+ }
}
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
@@ -1356,113 +1424,62 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
* tx request have been processed by the DMA engine. Called in
* interrupt context.
*/
-static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
- int drain)
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
struct user_sdma_request *req;
- bool defer;
- int i;
+ struct hfi1_user_sdma_pkt_q *pq;
+ struct hfi1_user_sdma_comp_q *cq;
+ u16 idx;
if (!tx->req)
return;
req = tx->req;
- /*
- * If this is the callback for the last packet of the request,
- * queue up the request for clean up.
- */
- defer = (tx->seqnum == req->info.npkts - 1);
-
- /*
- * If we have any io vectors associated with this txreq,
- * check whether they need to be 'freed'. We can't free them
- * here because the unpin function needs to be able to sleep.
- */
- for (i = tx->idx; i >= 0; i--) {
- if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
- defer = true;
- break;
- }
- }
+ pq = req->pq;
+ cq = req->cq;
- req->status = status;
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
- defer = true;
}
- /*
- * Defer the clean up of the iovectors and the request until later
- * so it can be done outside of interrupt context.
- */
- if (defer) {
- spin_lock(&req->txcmp_lock);
- list_add_tail(&tx->list, &req->txcmp);
- spin_unlock(&req->txcmp_lock);
- schedule_work(&req->worker);
+ req->seqcomp = tx->seqnum;
+ kmem_cache_free(pq->txreq_cache, tx);
+ tx = NULL;
+
+ idx = req->info.comp_idx;
+ if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+ if (req->seqcomp == req->info.npkts - 1) {
+ req->status = 0;
+ user_sdma_free_request(req, false);
+ pq_update(pq);
+ set_comp_state(pq, cq, idx, COMPLETE, 0);
+ }
} else {
- kmem_cache_free(req->pq->txreq_cache, tx);
+ if (status != SDMA_TXREQ_S_OK)
+ req->status = status;
+ if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+ (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+ test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+ user_sdma_free_request(req, false);
+ pq_update(pq);
+ set_comp_state(pq, cq, idx, ERROR, req->status);
+ }
}
}
-static void user_sdma_delayed_completion(struct work_struct *work)
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
- struct user_sdma_request *req =
- container_of(work, struct user_sdma_request, worker);
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- struct user_sdma_txreq *tx = NULL;
- unsigned long flags;
- u64 seqnum;
- int i;
-
- while (1) {
- spin_lock_irqsave(&req->txcmp_lock, flags);
- if (!list_empty(&req->txcmp)) {
- tx = list_first_entry(&req->txcmp,
- struct user_sdma_txreq, list);
- list_del(&tx->list);
- }
- spin_unlock_irqrestore(&req->txcmp_lock, flags);
- if (!tx)
- break;
-
- for (i = tx->idx; i >= 0; i--)
- if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
- unpin_vector_pages(req, tx->iovecs[i].vec);
-
- seqnum = tx->seqnum;
- kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- if (req->status != SDMA_TXREQ_S_OK) {
- if (seqnum == ACCESS_ONCE(req->seqnum) &&
- test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
- atomic_dec(&pq->n_reqs);
- set_comp_state(req, ERROR, req->status);
- user_sdma_free_request(req);
- break;
- }
- } else {
- if (seqnum == req->info.npkts - 1) {
- atomic_dec(&pq->n_reqs);
- set_comp_state(req, COMPLETE, 0);
- user_sdma_free_request(req);
- break;
- }
- }
- }
-
- if (!atomic_read(&pq->n_reqs)) {
+ if (atomic_dec_and_test(&pq->n_reqs)) {
xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
wake_up(&pq->wait);
}
}
-static void user_sdma_free_request(struct user_sdma_request *req)
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
{
if (!list_empty(&req->txps)) {
struct sdma_txreq *t, *p;
@@ -1476,25 +1493,87 @@ static void user_sdma_free_request(struct user_sdma_request *req)
}
}
if (req->data_iovs) {
+ struct sdma_mmu_node *node;
+ struct mmu_rb_node *mnode;
int i;
- for (i = 0; i < req->data_iovs; i++)
- if (req->iovs[i].npages && req->iovs[i].pages)
- unpin_vector_pages(req, &req->iovs[i]);
+ for (i = 0; i < req->data_iovs; i++) {
+ mnode = hfi1_mmu_rb_search(
+ &req->pq->sdma_rb_root,
+ (unsigned long)req->iovs[i].iov.iov_base,
+ req->iovs[i].iov.iov_len);
+ if (!mnode)
+ continue;
+
+ node = container_of(mnode, struct sdma_mmu_node, rb);
+ if (unpin)
+ hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+ &node->rb);
+ else
+ atomic_dec(&node->refcount);
+ }
}
kfree(req->tids);
clear_bit(SDMA_REQ_IN_USE, &req->flags);
}
-static inline void set_comp_state(struct user_sdma_request *req,
- enum hfi1_sdma_comp_state state,
- int ret)
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+ struct hfi1_user_sdma_comp_q *cq,
+ u16 idx, enum hfi1_sdma_comp_state state,
+ int ret)
{
- SDMA_DBG(req, "Setting completion status %u %d", state, ret);
- req->cq->comps[req->info.comp_idx].status = state;
+ hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
+ pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
+ cq->comps[idx].status = state;
if (state == ERROR)
- req->cq->comps[req->info.comp_idx].errcode = -ret;
- trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt,
- req->pq->subctxt, req->info.comp_idx,
- state, ret);
+ cq->comps[idx].errcode = -ret;
+ trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
+ idx, state, ret);
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len)
+{
+ return (bool)(node->addr == addr);
+}
+
+static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+ atomic_inc(&node->refcount);
+ return 0;
+}
+
+static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
+ bool notifier)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+ spin_lock(&node->pq->evict_lock);
+ list_del(&node->list);
+ node->pq->n_locked -= node->npages;
+ spin_unlock(&node->pq->evict_lock);
+
+ unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+ node->npages);
+ /*
+ * If called by the MMU notifier, we have to adjust the pinned
+ * page count ourselves.
+ */
+ if (notifier)
+ current->mm->pinned_vm -= node->npages;
+ kfree(node);
+}
+
+static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+ if (!atomic_read(&node->refcount))
+ return 1;
+ return 0;
}
diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h
index 0afa28508a8a..b9240e351161 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/staging/rdma/hfi1/user_sdma.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -69,7 +66,11 @@ struct hfi1_user_sdma_pkt_q {
struct iowait busy;
unsigned state;
wait_queue_head_t wait;
- struct mm_struct *user_mm;
+ unsigned long unpinned;
+ struct rb_root sdma_rb_root;
+ u32 n_locked;
+ struct list_head evict;
+ spinlock_t evict_lock; /* protect evict and n_locked */
};
struct hfi1_user_sdma_comp_q {
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index 176168614b5a..89f2aad45c1b 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -63,9 +60,9 @@
#include "device.h"
#include "trace.h"
#include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
-unsigned int hfi1_lkey_table_size = 16;
+static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
S_IRUGO);
MODULE_PARM_DESC(lkey_table_size,
@@ -124,45 +121,181 @@ unsigned int hfi1_max_srq_wrs = 0x1FFFF;
module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
+unsigned short piothreshold = 256;
+module_param(piothreshold, ushort, S_IRUGO);
+MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
+
+#define COPY_CACHELESS 1
+#define COPY_ADAPTIVE 2
+static unsigned int sge_copy_mode;
+module_param(sge_copy_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(sge_copy_mode,
+ "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");
+
static void verbs_sdma_complete(
struct sdma_txreq *cookie,
- int status,
- int drained);
+ int status);
+
+static int pio_wait(struct rvt_qp *qp,
+ struct send_context *sc,
+ struct hfi1_pkt_state *ps,
+ u32 flag);
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
+static uint wss_threshold;
+module_param(wss_threshold, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
+static uint wss_clean_period = 256;
+module_param(wss_clean_period, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
+
+/* memory working set size */
+struct hfi1_wss {
+ unsigned long *entries;
+ atomic_t total_count;
+ atomic_t clean_counter;
+ atomic_t clean_entry;
+
+ int threshold;
+ int num_entries;
+ long pages_mask;
+};
+
+static struct hfi1_wss wss;
+
+int hfi1_wss_init(void)
+{
+ long llc_size;
+ long llc_bits;
+ long table_size;
+ long table_bits;
+
+ /* check for a valid percent range - default to 80 if none or invalid */
+ if (wss_threshold < 1 || wss_threshold > 100)
+ wss_threshold = 80;
+ /* reject a wildly large period */
+ if (wss_clean_period > 1000000)
+ wss_clean_period = 256;
+ /* reject a zero period */
+ if (wss_clean_period == 0)
+ wss_clean_period = 1;
+
+ /*
+ * Calculate the table size - the next power of 2 larger than the
+ * LLC size. LLC size is in KiB.
+ */
+ llc_size = wss_llc_size() * 1024;
+ table_size = roundup_pow_of_two(llc_size);
+
+ /* one bit per page in rounded up table */
+ llc_bits = llc_size / PAGE_SIZE;
+ table_bits = table_size / PAGE_SIZE;
+ wss.pages_mask = table_bits - 1;
+ wss.num_entries = table_bits / BITS_PER_LONG;
+
+ wss.threshold = (llc_bits * wss_threshold) / 100;
+ if (wss.threshold == 0)
+ wss.threshold = 1;
+
+ atomic_set(&wss.clean_counter, wss_clean_period);
+
+ wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
+ GFP_KERNEL);
+ if (!wss.entries) {
+ hfi1_wss_exit();
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void hfi1_wss_exit(void)
+{
+ /* coded to handle partially initialized and repeat callers */
+ kfree(wss.entries);
+ wss.entries = NULL;
+}
+
/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; hfi1_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
+ * Advance the clean counter. When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking. Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information. Since this is only a heuristic, this is
+ * OK. Any innaccuracies will clean themselves out as the counter
+ * advances. That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero. When zero
+ * is reached an entry is cleaned.
*/
-const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = 0,
- [IB_QPS_INIT] = HFI1_POST_RECV_OK,
- [IB_QPS_RTR] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK,
- [IB_QPS_RTS] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
- HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK |
- HFI1_PROCESS_NEXT_SEND_OK,
- [IB_QPS_SQD] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
- HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK,
- [IB_QPS_SQE] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
- HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
- [IB_QPS_ERR] = HFI1_POST_RECV_OK | HFI1_FLUSH_RECV |
- HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
-};
+static void wss_advance_clean_counter(void)
+{
+ int entry;
+ int weight;
+ unsigned long bits;
-struct hfi1_ucontext {
- struct ib_ucontext ibucontext;
-};
+ /* become the cleaner if we decrement the counter to zero */
+ if (atomic_dec_and_test(&wss.clean_counter)) {
+ /*
+ * Set, not add, the clean period. This avoids an issue
+ * where the counter could decrement below the clean period.
+ * Doing a set can result in lost decrements, slowing the
+ * clean advance. Since this a heuristic, this possible
+ * slowdown is OK.
+ *
+ * An alternative is to loop, advancing the counter by a
+ * clean period until the result is > 0. However, this could
+ * lead to several threads keeping another in the clean loop.
+ * This could be mitigated by limiting the number of times
+ * we stay in the loop.
+ */
+ atomic_set(&wss.clean_counter, wss_clean_period);
-static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext
- *ibucontext)
+ /*
+ * Uniquely grab the entry to clean and move to next.
+ * The current entry is always the lower bits of
+ * wss.clean_entry. The table size, wss.num_entries,
+ * is always a power-of-2.
+ */
+ entry = (atomic_inc_return(&wss.clean_entry) - 1)
+ & (wss.num_entries - 1);
+
+ /* clear the entry and count the bits */
+ bits = xchg(&wss.entries[entry], 0);
+ weight = hweight64((u64)bits);
+ /* only adjust the contended total count if needed */
+ if (weight)
+ atomic_sub(weight, &wss.total_count);
+ }
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(void *address)
{
- return container_of(ibucontext, struct hfi1_ucontext, ibucontext);
+ u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
+ u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+ u32 nr = page & (BITS_PER_LONG - 1);
+
+ if (!test_and_set_bit(nr, &wss.entries[entry]))
+ atomic_inc(&wss.total_count);
+
+ wss_advance_clean_counter();
}
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp);
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline int wss_exceeds_threshold(void)
+{
+ return atomic_read(&wss.total_count) >= wss.threshold;
+}
/*
* Translate ib_wr_opcode into ib_wc_opcode.
@@ -274,14 +407,47 @@ __be64 ib_hfi1_sys_image_guid;
* @ss: the SGE state
* @data: the data to copy
* @length: the length of the data
+ * @copy_last: do a separate copy of the last 8 bytes
*/
void hfi1_copy_sge(
- struct hfi1_sge_state *ss,
+ struct rvt_sge_state *ss,
void *data, u32 length,
- int release)
+ int release,
+ int copy_last)
{
- struct hfi1_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
+ int in_last = 0;
+ int i;
+ int cacheless_copy = 0;
+
+ if (sge_copy_mode == COPY_CACHELESS) {
+ cacheless_copy = length >= PAGE_SIZE;
+ } else if (sge_copy_mode == COPY_ADAPTIVE) {
+ if (length >= PAGE_SIZE) {
+ /*
+ * NOTE: this *assumes*:
+ * o The first vaddr is the dest.
+ * o If multiple pages, then vaddr is sequential.
+ */
+ wss_insert(sge->vaddr);
+ if (length >= (2 * PAGE_SIZE))
+ wss_insert(sge->vaddr + PAGE_SIZE);
+
+ cacheless_copy = wss_exceeds_threshold();
+ } else {
+ wss_advance_clean_counter();
+ }
+ }
+ if (copy_last) {
+ if (length > 8) {
+ length -= 8;
+ } else {
+ copy_last = 0;
+ in_last = 1;
+ }
+ }
+again:
while (length) {
u32 len = sge->length;
@@ -290,17 +456,25 @@ void hfi1_copy_sge(
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- memcpy(sge->vaddr, data, len);
+ if (unlikely(in_last)) {
+ /* enforce byte transfer ordering */
+ for (i = 0; i < len; i++)
+ ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+ } else if (cacheless_copy) {
+ cacheless_memcpy(sge->vaddr, data, len);
+ } else {
+ memcpy(sge->vaddr, data, len);
+ }
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= HFI1_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -313,6 +487,13 @@ void hfi1_copy_sge(
data += len;
length -= len;
}
+
+ if (copy_last) {
+ copy_last = 0;
+ in_last = 1;
+ length = 8;
+ goto again;
+ }
}
/**
@@ -320,9 +501,9 @@ void hfi1_copy_sge(
* @ss: the SGE state
* @length: the number of bytes to skip
*/
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
{
- struct hfi1_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
while (length) {
u32 len = sge->length;
@@ -337,11 +518,11 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- hfi1_put_mr(sge->mr);
+ rvt_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= HFI1_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
@@ -355,231 +536,6 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
}
}
-/**
- * post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
-{
- struct hfi1_swqe *wqe;
- u32 next;
- int i;
- int j;
- int acc;
- struct hfi1_lkey_table *rkt;
- struct hfi1_pd *pd;
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- struct hfi1_pportdata *ppd;
- struct hfi1_ibport *ibp;
-
- /* IB spec says that num_sge == 0 is OK. */
- if (unlikely(wr->num_sge > qp->s_max_sge))
- return -EINVAL;
-
- ppd = &dd->pport[qp->port_num - 1];
- ibp = &ppd->ibport_data;
-
- /*
- * Don't allow RDMA reads or atomic operations on UC or
- * undefined operations.
- * Make sure buffer is large enough to hold the result for atomics.
- */
- if (qp->ibqp.qp_type == IB_QPT_UC) {
- if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
- return -EINVAL;
- } else if (qp->ibqp.qp_type != IB_QPT_RC) {
- /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
- if (wr->opcode != IB_WR_SEND &&
- wr->opcode != IB_WR_SEND_WITH_IMM)
- return -EINVAL;
- /* Check UD destination address PD */
- if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
- return -EINVAL;
- } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
- return -EINVAL;
- else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
- (wr->num_sge == 0 ||
- wr->sg_list[0].length < sizeof(u64) ||
- wr->sg_list[0].addr & (sizeof(u64) - 1)))
- return -EINVAL;
- else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
- return -EINVAL;
-
- next = qp->s_head + 1;
- if (next >= qp->s_size)
- next = 0;
- if (next == qp->s_last)
- return -ENOMEM;
-
- rkt = &to_idev(qp->ibqp.device)->lk_table;
- pd = to_ipd(qp->ibqp.pd);
- wqe = get_swqe_ptr(qp, qp->s_head);
-
-
- if (qp->ibqp.qp_type != IB_QPT_UC &&
- qp->ibqp.qp_type != IB_QPT_RC)
- memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
- else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE ||
- wr->opcode == IB_WR_RDMA_READ)
- memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
- else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
- else
- memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
- wqe->length = 0;
- j = 0;
- if (wr->num_sge) {
- acc = wr->opcode >= IB_WR_RDMA_READ ?
- IB_ACCESS_LOCAL_WRITE : 0;
- for (i = 0; i < wr->num_sge; i++) {
- u32 length = wr->sg_list[i].length;
- int ok;
-
- if (length == 0)
- continue;
- ok = hfi1_lkey_ok(rkt, pd, &wqe->sg_list[j],
- &wr->sg_list[i], acc);
- if (!ok)
- goto bail_inval_free;
- wqe->length += length;
- j++;
- }
- wqe->wr.num_sge = j;
- }
- if (qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_RC) {
- if (wqe->length > 0x80000000U)
- goto bail_inval_free;
- } else {
- struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
-
- atomic_inc(&ah->refcount);
- }
- wqe->ssn = qp->s_ssn++;
- qp->s_head = next;
-
- return 0;
-
-bail_inval_free:
- /* release mr holds */
- while (j) {
- struct hfi1_sge *sge = &wqe->sg_list[--j];
-
- hfi1_put_mr(sge->mr);
- }
- return -EINVAL;
-}
-
-/**
- * post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
-{
- struct hfi1_qp *qp = to_iqp(ibqp);
- int err = 0;
- int call_send;
- unsigned long flags;
- unsigned nreq = 0;
-
- spin_lock_irqsave(&qp->s_lock, flags);
-
- /* Check that state is OK to post send. */
- if (unlikely(!(ib_hfi1_state_ops[qp->state] & HFI1_POST_SEND_OK))) {
- spin_unlock_irqrestore(&qp->s_lock, flags);
- return -EINVAL;
- }
-
- /* sq empty and not list -> call send */
- call_send = qp->s_head == qp->s_last && !wr->next;
-
- for (; wr; wr = wr->next) {
- err = post_one_send(qp, wr);
- if (unlikely(err)) {
- *bad_wr = wr;
- goto bail;
- }
- nreq++;
- }
-bail:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- if (nreq && !call_send)
- _hfi1_schedule_send(qp);
- if (nreq && call_send)
- hfi1_do_send(&qp->s_iowait.iowork);
- return err;
-}
-
-/**
- * post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
-{
- struct hfi1_qp *qp = to_iqp(ibqp);
- struct hfi1_rwq *wq = qp->r_rq.wq;
- unsigned long flags;
- int ret;
-
- /* Check that state is OK to post receive. */
- if (!(ib_hfi1_state_ops[qp->state] & HFI1_POST_RECV_OK) || !wq) {
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
-
- for (; wr; wr = wr->next) {
- struct hfi1_rwqe *wqe;
- u32 next;
- int i;
-
- if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
-
- spin_lock_irqsave(&qp->r_rq.lock, flags);
- next = wq->head + 1;
- if (next >= qp->r_rq.size)
- next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
- *bad_wr = wr;
- ret = -ENOMEM;
- goto bail;
- }
-
- wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
- wqe->wr_id = wr->wr_id;
- wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
- /* Make sure queue entry is written before the head index. */
- smp_wmb();
- wq->head = next;
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
- }
- ret = 0;
-
-bail:
- return ret;
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@@ -587,18 +543,17 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp;
- if (!(ib_hfi1_state_ops[packet->qp->state] & HFI1_PROCESS_RECV_OK))
+ if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
goto dropit;
if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
(opcode == IB_OPCODE_CNP))
return 1;
dropit:
ibp = &packet->rcd->ppd->ibport_data;
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
return 0;
}
-
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -614,6 +569,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
unsigned long flags;
u32 qp_num;
int lnh;
@@ -622,9 +578,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
/* Check for GRH */
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_BTH)
+ if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
- else if (lnh == HFI1_LRH_GRH) {
+ } else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
packet->ohdr = &hdr->u.l.oth;
@@ -634,8 +590,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
packet->rcv_flags |= HFI1_HAS_GRH;
- } else
+ } else {
goto drop;
+ }
trace_input_ibhdr(rcd->dd, hdr);
@@ -643,17 +600,17 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
- qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK;
+ qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
lid = be16_to_cpu(hdr->lrh[1]);
- if (unlikely((lid >= HFI1_MULTICAST_LID_BASE) &&
- (lid != HFI1_PERMISSIVE_LID))) {
- struct hfi1_mcast *mcast;
- struct hfi1_mcast_qp *p;
+ if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+ struct rvt_mcast *mcast;
+ struct rvt_mcast_qp *p;
if (lnh != HFI1_LRH_GRH)
goto drop;
- mcast = hfi1_mcast_find(ibp, &hdr->u.l.grh.dgid);
- if (mcast == NULL)
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
@@ -663,14 +620,14 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
/*
- * Notify hfi1_multicast_detach() if it is waiting for us
+ * Notify rvt_multicast_detach() if it is waiting for us
* to finish.
*/
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
rcu_read_lock();
- packet->qp = hfi1_lookup_qpn(ibp, qp_num);
+ packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp) {
rcu_read_unlock();
goto drop;
@@ -684,7 +641,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
return;
drop:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
}
/*
@@ -695,15 +652,17 @@ static void mem_timer(unsigned long data)
{
struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
struct list_head *list = &dev->memwait;
- struct hfi1_qp *qp = NULL;
+ struct rvt_qp *qp = NULL;
struct iowait *wait;
unsigned long flags;
+ struct hfi1_qp_priv *priv;
write_seqlock_irqsave(&dev->iowait_lock, flags);
if (!list_empty(list)) {
wait = list_first_entry(list, struct iowait, list);
- qp = container_of(wait, struct hfi1_qp, s_iowait);
- list_del_init(&qp->s_iowait.list);
+ qp = iowait_to_qp(wait);
+ priv = qp->priv;
+ list_del_init(&priv->s_iowait.list);
/* refcount held until actual wake up */
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -711,12 +670,12 @@ static void mem_timer(unsigned long data)
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
if (qp)
- hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM);
+ hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
}
-void update_sge(struct hfi1_sge_state *ss, u32 length)
+void update_sge(struct rvt_sge_state *ss, u32 length)
{
- struct hfi1_sge *sge = &ss->sge;
+ struct rvt_sge *sge = &ss->sge;
sge->vaddr += length;
sge->length -= length;
@@ -725,7 +684,7 @@ void update_sge(struct hfi1_sge_state *ss, u32 length)
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= HFI1_SEGSZ) {
+ if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
return;
sge->n = 0;
@@ -735,143 +694,55 @@ void update_sge(struct hfi1_sge_state *ss, u32 length)
}
}
-static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
- struct hfi1_qp *qp)
-{
- struct verbs_txreq *tx;
- unsigned long flags;
-
- tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
- if (!tx) {
- spin_lock_irqsave(&qp->s_lock, flags);
- write_seqlock(&dev->iowait_lock);
- if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK &&
- list_empty(&qp->s_iowait.list)) {
- dev->n_txwait++;
- qp->s_flags |= HFI1_S_WAIT_TX;
- list_add_tail(&qp->s_iowait.list, &dev->txwait);
- trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX);
- atomic_inc(&qp->refcount);
- }
- qp->s_flags &= ~HFI1_S_BUSY;
- write_sequnlock(&dev->iowait_lock);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- tx = ERR_PTR(-EBUSY);
- }
- return tx;
-}
-
-static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
- struct hfi1_qp *qp)
-{
- struct verbs_txreq *tx;
-
- tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
- if (!tx) {
- /* call slow path to get the lock */
- tx = __get_txreq(dev, qp);
- if (IS_ERR(tx))
- return tx;
- }
- tx->qp = qp;
- return tx;
-}
-
-void hfi1_put_txreq(struct verbs_txreq *tx)
-{
- struct hfi1_ibdev *dev;
- struct hfi1_qp *qp;
- unsigned long flags;
- unsigned int seq;
-
- qp = tx->qp;
- dev = to_idev(qp->ibqp.device);
-
- if (tx->mr) {
- hfi1_put_mr(tx->mr);
- tx->mr = NULL;
- }
- sdma_txclean(dd_from_dev(dev), &tx->txreq);
-
- /* Free verbs_txreq and return to slab cache */
- kmem_cache_free(dev->verbs_txreq_cache, tx);
-
- do {
- seq = read_seqbegin(&dev->iowait_lock);
- if (!list_empty(&dev->txwait)) {
- struct iowait *wait;
-
- write_seqlock_irqsave(&dev->iowait_lock, flags);
- /* Wake up first QP wanting a free struct */
- wait = list_first_entry(&dev->txwait, struct iowait,
- list);
- qp = container_of(wait, struct hfi1_qp, s_iowait);
- list_del_init(&qp->s_iowait.list);
- /* refcount held until actual wake up */
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
- hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX);
- break;
- }
- } while (read_seqretry(&dev->iowait_lock, seq));
-}
-
/*
* This is called with progress side lock held.
*/
/* New API */
static void verbs_sdma_complete(
struct sdma_txreq *cookie,
- int status,
- int drained)
+ int status)
{
struct verbs_txreq *tx =
container_of(cookie, struct verbs_txreq, txreq);
- struct hfi1_qp *qp = tx->qp;
+ struct rvt_qp *qp = tx->qp;
spin_lock(&qp->s_lock);
- if (tx->wqe)
+ if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
- else if (qp->ibqp.qp_type == IB_QPT_RC) {
+ } else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct hfi1_ib_header *hdr;
hdr = &tx->phdr.hdr;
hfi1_rc_send_complete(qp, hdr);
}
- if (drained) {
- /*
- * This happens when the send engine notes
- * a QP in the error state and cannot
- * do the flush work until that QP's
- * sdma work has finished.
- */
- if (qp->s_flags & HFI1_S_WAIT_DMA) {
- qp->s_flags &= ~HFI1_S_WAIT_DMA;
- hfi1_schedule_send(qp);
- }
- }
spin_unlock(&qp->s_lock);
hfi1_put_txreq(tx);
}
-static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
+static int wait_kmem(struct hfi1_ibdev *dev,
+ struct rvt_qp *qp,
+ struct hfi1_pkt_state *ps)
{
+ struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
- if (list_empty(&qp->s_iowait.list)) {
+ list_add_tail(&ps->s_txreq->txreq.list,
+ &priv->s_iowait.tx_head);
+ if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
- qp->s_flags |= HFI1_S_WAIT_KMEM;
- list_add_tail(&qp->s_iowait.list, &dev->memwait);
- trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM);
+ qp->s_flags |= RVT_S_WAIT_KMEM;
+ list_add_tail(&priv->s_iowait.list, &dev->memwait);
+ trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
atomic_inc(&qp->refcount);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~HFI1_S_BUSY;
+ qp->s_flags &= ~RVT_S_BUSY;
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -884,14 +755,14 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
*
* Add failures will revert the sge cursor
*/
-static int build_verbs_ulp_payload(
+static noinline int build_verbs_ulp_payload(
struct sdma_engine *sde,
- struct hfi1_sge_state *ss,
+ struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx)
{
- struct hfi1_sge *sg_list = ss->sg_list;
- struct hfi1_sge sge = ss->sge;
+ struct rvt_sge *sg_list = ss->sg_list;
+ struct rvt_sge sge = ss->sge;
u8 num_sge = ss->num_sge;
u32 len;
int ret = 0;
@@ -928,23 +799,21 @@ bail_txadd:
* NOTE: DMA mapping is held in the tx until completed in the ring or
* the tx desc is freed without having been submitted to the ring
*
- * This routine insures the following all the helper routine
- * calls succeed.
+ * This routine ensures all the helper routine calls succeed.
*/
/* New API */
static int build_verbs_tx_desc(
struct sdma_engine *sde,
- struct hfi1_sge_state *ss,
+ struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
struct ahg_ib_header *ahdr,
u64 pbc)
{
int ret = 0;
- struct hfi1_pio_header *phdr;
+ struct hfi1_pio_header *phdr = &tx->phdr;
u16 hdrbytes = tx->hdr_dwords << 2;
- phdr = &tx->phdr;
if (!ahdr->ahgcount) {
ret = sdma_txinit_ahg(
&tx->txreq,
@@ -958,29 +827,14 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
phdr->pbc = cpu_to_le64(pbc);
- memcpy(&phdr->hdr, &ahdr->ibh, hdrbytes - sizeof(phdr->pbc));
- /* add the header */
ret = sdma_txadd_kvaddr(
sde->dd,
&tx->txreq,
- &tx->phdr,
- tx->hdr_dwords << 2);
+ phdr,
+ hdrbytes);
if (ret)
goto bail_txadd;
} else {
- struct hfi1_other_headers *sohdr = &ahdr->ibh.u.oth;
- struct hfi1_other_headers *dohdr = &phdr->hdr.u.oth;
-
- /* needed in rc_send_complete() */
- phdr->hdr.lrh[0] = ahdr->ibh.lrh[0];
- if ((be16_to_cpu(phdr->hdr.lrh[0]) & 3) == HFI1_LRH_GRH) {
- sohdr = &ahdr->ibh.u.l.oth;
- dohdr = &phdr->hdr.u.l.oth;
- }
- /* opcode */
- dohdr->bth[0] = sohdr->bth[0];
- /* PSN/ACK */
- dohdr->bth[2] = sohdr->bth[2];
ret = sdma_txinit_ahg(
&tx->txreq,
ahdr->tx_flags,
@@ -1001,80 +855,75 @@ bail_txadd:
return ret;
}
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
- struct ahg_ib_header *ahdr = qp->s_hdr;
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ahg_ib_header *ahdr = priv->s_hdr;
u32 hdrwords = qp->s_hdrwords;
- struct hfi1_sge_state *ss = qp->s_cur_sge;
+ struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- struct sdma_txreq *stx;
u64 pbc_flags = 0;
- u8 sc5 = qp->s_sc;
+ u8 sc5 = priv->s_sc;
+
int ret;
- if (!list_empty(&qp->s_iowait.tx_head)) {
- stx = list_first_entry(
- &qp->s_iowait.tx_head,
- struct sdma_txreq,
- list);
- list_del_init(&stx->list);
- tx = container_of(stx, struct verbs_txreq, txreq);
- ret = sdma_send_txreq(tx->sde, &qp->s_iowait, stx);
- if (unlikely(ret == -ECOMM))
+ tx = ps->s_txreq;
+ if (!sdma_txreq_built(&tx->txreq)) {
+ if (likely(pbc == 0)) {
+ u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ /* No vl15 here */
+ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
+ pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+
+ pbc = create_pbc(ppd,
+ pbc_flags,
+ qp->srate_mbps,
+ vl,
+ plen);
+ }
+ tx->wqe = qp->s_wqe;
+ ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
+ if (unlikely(ret))
+ goto bail_build;
+ }
+ ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+ if (unlikely(ret < 0)) {
+ if (ret == -ECOMM)
goto bail_ecomm;
return ret;
}
-
- tx = get_txreq(dev, qp);
- if (IS_ERR(tx))
- goto bail_tx;
-
- tx->sde = qp->s_sde;
-
- if (likely(pbc == 0)) {
- u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
- /* No vl15 here */
- /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
-
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
- }
- tx->wqe = qp->s_wqe;
- tx->mr = qp->s_rdma_mr;
- if (qp->s_rdma_mr)
- qp->s_rdma_mr = NULL;
- tx->hdr_dwords = hdrwords + 2;
- ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
- if (unlikely(ret))
- goto bail_build;
- trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
- ret = sdma_send_txreq(tx->sde, &qp->s_iowait, &tx->txreq);
- if (unlikely(ret == -ECOMM))
- goto bail_ecomm;
+ trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+ &ps->s_txreq->phdr.hdr);
return ret;
bail_ecomm:
/* The current one got "sent" */
return 0;
bail_build:
- /* kmalloc or mapping fail */
- hfi1_put_txreq(tx);
- return wait_kmem(dev, qp);
-bail_tx:
- return PTR_ERR(tx);
+ ret = wait_kmem(dev, qp, ps);
+ if (!ret) {
+ /* free txreq - bad state */
+ hfi1_put_txreq(ps->s_txreq);
+ ps->s_txreq = NULL;
+ }
+ return ret;
}
/*
* If we are now in the error state, return zero to flush the
* send work request.
*/
-static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
+static int pio_wait(struct rvt_qp *qp,
+ struct send_context *sc,
+ struct hfi1_pkt_state *ps,
+ u32 flag)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_devdata *dd = sc->dd;
struct hfi1_ibdev *dev = &dd->verbs_dev;
unsigned long flags;
@@ -1087,74 +936,89 @@ static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
* enabling the PIO avail interrupt.
*/
spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
- if (list_empty(&qp->s_iowait.list)) {
+ list_add_tail(&ps->s_txreq->txreq.list,
+ &priv->s_iowait.tx_head);
+ if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty;
+ dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
+ dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
dev->n_piowait++;
- qp->s_flags |= HFI1_S_WAIT_PIO;
+ qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
- list_add_tail(&qp->s_iowait.list, &sc->piowait);
- trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO);
+ list_add_tail(&priv->s_iowait.list, &sc->piowait);
+ trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
atomic_inc(&qp->refcount);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~HFI1_S_BUSY;
+ qp->s_flags &= ~RVT_S_BUSY;
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5)
+static void verbs_pio_complete(void *arg, int code)
{
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1);
- u8 vl;
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
+ struct hfi1_qp_priv *priv = qp->priv;
- vl = sc_to_vlt(dd, sc5);
- if (vl >= ppd->vls_supported && vl != 15)
- return NULL;
- return dd->vld[vl].sc;
+ if (iowait_pio_dec(&priv->s_iowait))
+ iowait_drain_wakeup(&priv->s_iowait);
}
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
- struct ahg_ib_header *ahdr = qp->s_hdr;
+ struct hfi1_qp_priv *priv = qp->priv;
u32 hdrwords = qp->s_hdrwords;
- struct hfi1_sge_state *ss = qp->s_cur_sge;
+ struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
u32 dwords = (len + 3) >> 2;
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
- u32 *hdr = (u32 *)&ahdr->ibh;
+ u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
u64 pbc_flags = 0;
- u32 sc5;
+ u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
struct pio_buf *pbuf;
int wc_status = IB_WC_SUCCESS;
+ int ret = 0;
+ pio_release_cb cb = NULL;
+
+ /* only RC/UC use complete */
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ cb = verbs_pio_complete;
+ break;
+ default:
+ break;
+ }
/* vl15 special case taken care of in ud.c */
- sc5 = qp->s_sc;
- sc = qp_to_send_context(qp, sc5);
+ sc5 = priv->s_sc;
+ sc = ps->s_txreq->psc;
- if (!sc)
- return -EINVAL;
if (likely(pbc == 0)) {
- u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
}
- pbuf = sc_buffer_alloc(sc, plen, NULL, NULL);
- if (unlikely(pbuf == NULL)) {
+ if (cb)
+ iowait_pio_inc(&priv->s_iowait);
+ pbuf = sc_buffer_alloc(sc, plen, cb, qp);
+ if (unlikely(!pbuf)) {
+ if (cb)
+ verbs_pio_complete(qp, 0);
if (ppd->host_link_state != HLS_UP_ACTIVE) {
/*
* If we have filled the PIO buffers to capacity and are
@@ -1174,7 +1038,12 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
* so lets continue to queue the request.
*/
hfi1_cdbg(PIO, "alloc failed. state active, queuing");
- return no_bufs_available(qp, sc);
+ ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
+ if (!ret)
+ /* txreq not queued - free */
+ goto bail;
+ /* tx consumed in wait */
+ return ret;
}
}
@@ -1182,7 +1051,7 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
} else {
if (ss) {
- seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4);
+ seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4);
while (len) {
void *addr = ss->sge.vaddr;
u32 slen = ss->sge.length;
@@ -1197,12 +1066,8 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
}
}
- trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-
- if (qp->s_rdma_mr) {
- hfi1_put_mr(qp->s_rdma_mr);
- qp->s_rdma_mr = NULL;
- }
+ trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+ &ps->s_txreq->phdr.hdr);
pio_bail:
if (qp->s_wqe) {
@@ -1211,10 +1076,15 @@ pio_bail:
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_rc_send_complete(qp, &ahdr->ibh);
+ hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- return 0;
+
+ ret = 0;
+
+bail:
+ hfi1_put_txreq(ps->s_txreq);
+ return ret;
}
/*
@@ -1247,13 +1117,14 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
*/
static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
struct hfi1_ib_header *hdr,
- struct hfi1_qp *qp)
+ struct rvt_qp *qp)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_other_headers *ohdr;
struct hfi1_devdata *dd;
int i = 0;
u16 pkey;
- u8 lnh, sc5 = qp->s_sc;
+ u8 lnh, sc5 = priv->s_sc;
if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
return 0;
@@ -1271,14 +1142,14 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
goto bad;
-
/* Is the pkey = 0x0, or 0x8000? */
if ((pkey & PKEY_LOW_15_MASK) == 0)
goto bad;
/* The most likely matching pkey has index qp->s_pkey_index */
if (unlikely(!egress_pkey_matches_entry(pkey,
- ppd->pkeys[qp->s_pkey_index]))) {
+ ppd->pkeys
+ [qp->s_pkey_index]))) {
/* no match - try the entire table */
for (; i < MAX_PKEY_VALUES; i++) {
if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
@@ -1302,31 +1173,65 @@ bad:
}
/**
+ * get_send_routine - choose an egress routine
+ *
+ * Choose an egress routine based on QP type
+ * and size
+ */
+static inline send_routine get_send_routine(struct rvt_qp *qp,
+ struct verbs_txreq *tx)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ib_header *h = &tx->phdr.hdr;
+
+ if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
+ return dd->process_pio_send;
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_SMI:
+ return dd->process_pio_send;
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ break;
+ case IB_QPT_RC:
+ if (piothreshold &&
+ qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+ (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
+ iowait_sdma_pending(&priv->s_iowait) == 0 &&
+ !sdma_txreq_built(&tx->txreq))
+ return dd->process_pio_send;
+ break;
+ case IB_QPT_UC:
+ if (piothreshold &&
+ qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+ (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+ iowait_sdma_pending(&priv->s_iowait) == 0 &&
+ !sdma_txreq_built(&tx->txreq))
+ return dd->process_pio_send;
+ break;
+ default:
+ break;
+ }
+ return dd->process_dma_send;
+}
+
+/**
* hfi1_verbs_send - send a packet
* @qp: the QP to send on
* @ps: the state of the packet to send
*
* Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
*/
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- struct ahg_ib_header *ahdr = qp->s_hdr;
+ struct hfi1_qp_priv *priv = qp->priv;
+ send_routine sr;
int ret;
- int pio = 0;
- unsigned long flags = 0;
-
- /*
- * VL15 packets (IB_QPT_SMI) will always use PIO, so we
- * can defer SDMA restart until link goes ACTIVE without
- * worrying about just how we got there.
- */
- if ((qp->ibqp.qp_type == IB_QPT_SMI) ||
- !(dd->flags & HFI1_HAS_SEND_DMA))
- pio = 1;
- ret = egress_pkey_check(dd->pport, &ahdr->ibh, qp);
+ sr = get_send_routine(qp, ps->s_txreq);
+ ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp);
if (unlikely(ret)) {
/*
* The value we are returning here does not get propagated to
@@ -1336,7 +1241,9 @@ int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
* mechanism for handling the errors. So for SDMA we can just
* return.
*/
- if (pio) {
+ if (sr == dd->process_pio_send) {
+ unsigned long flags;
+
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1345,71 +1252,57 @@ int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
}
return -EINVAL;
}
-
- if (pio) {
- ret = dd->process_pio_send(qp, ps, 0);
- } else {
-#ifdef CONFIG_SDMA_VERBOSITY
- dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n",
- slashstrip(__FILE__), __LINE__, __func__);
- dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", qp->s_hdrwords,
- qp->s_cur_size);
-#endif
- ret = dd->process_dma_send(qp, ps, 0);
- }
-
- return ret;
+ if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
+ return pio_wait(qp,
+ ps->s_txreq->psc,
+ ps,
+ RVT_S_WAIT_PIO_DRAIN);
+ return sr(qp, ps, 0);
}
-static int query_device(struct ib_device *ibdev,
- struct ib_device_attr *props,
- struct ib_udata *uhw)
+/**
+ * hfi1_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- struct hfi1_ibdev *dev = to_idev(ibdev);
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
- memset(props, 0, sizeof(*props));
-
- props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
- IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
- IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-
- props->page_size_cap = PAGE_SIZE;
- props->vendor_id =
- dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
- props->vendor_part_id = dd->pcidev->device;
- props->hw_ver = dd->minrev;
- props->sys_image_guid = ib_hfi1_sys_image_guid;
- props->max_mr_size = ~0ULL;
- props->max_qp = hfi1_max_qps;
- props->max_qp_wr = hfi1_max_qp_wrs;
- props->max_sge = hfi1_max_sges;
- props->max_sge_rd = hfi1_max_sges;
- props->max_cq = hfi1_max_cqs;
- props->max_ah = hfi1_max_ahs;
- props->max_cqe = hfi1_max_cqes;
- props->max_mr = dev->lk_table.max;
- props->max_fmr = dev->lk_table.max;
- props->max_map_per_fmr = 32767;
- props->max_pd = hfi1_max_pds;
- props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
- props->max_qp_init_rd_atom = 255;
- /* props->max_res_rd_atom */
- props->max_srq = hfi1_max_srqs;
- props->max_srq_wr = hfi1_max_srq_wrs;
- props->max_srq_sge = hfi1_max_srq_sges;
- /* props->local_ca_ack_delay */
- props->atomic_cap = IB_ATOMIC_GLOB;
- props->max_pkeys = hfi1_get_npkeys(dd);
- props->max_mcast_grp = hfi1_max_mcast_grps;
- props->max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
- props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
- props->max_mcast_grp;
-
- return 0;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+
+ memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
+
+ rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+ IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ rdi->dparms.props.page_size_cap = PAGE_SIZE;
+ rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
+ rdi->dparms.props.vendor_part_id = dd->pcidev->device;
+ rdi->dparms.props.hw_ver = dd->minrev;
+ rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
+ rdi->dparms.props.max_mr_size = ~0ULL;
+ rdi->dparms.props.max_qp = hfi1_max_qps;
+ rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
+ rdi->dparms.props.max_sge = hfi1_max_sges;
+ rdi->dparms.props.max_sge_rd = hfi1_max_sges;
+ rdi->dparms.props.max_cq = hfi1_max_cqs;
+ rdi->dparms.props.max_ah = hfi1_max_ahs;
+ rdi->dparms.props.max_cqe = hfi1_max_cqes;
+ rdi->dparms.props.max_mr = rdi->lkey_table.max;
+ rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+ rdi->dparms.props.max_map_per_fmr = 32767;
+ rdi->dparms.props.max_pd = hfi1_max_pds;
+ rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
+ rdi->dparms.props.max_qp_init_rd_atom = 255;
+ rdi->dparms.props.max_srq = hfi1_max_srqs;
+ rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
+ rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
+ rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+ rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
+ rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
+ rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
+ rdi->dparms.props.max_total_mcast_qp_attach =
+ rdi->dparms.props.max_mcast_qp_attach *
+ rdi->dparms.props.max_mcast_grp;
}
static inline u16 opa_speed_to_ib(u16 in)
@@ -1443,33 +1336,24 @@ static inline u16 opa_width_to_ib(u16 in)
}
}
-static int query_port(struct ib_device *ibdev, u8 port,
+static int query_port(struct rvt_dev_info *rdi, u8 port_num,
struct ib_port_attr *props)
{
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- struct hfi1_ibport *ibp = to_iport(ibdev, port);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+ struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
u16 lid = ppd->lid;
- memset(props, 0, sizeof(*props));
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
- props->sm_lid = ibp->sm_lid;
- props->sm_sl = ibp->sm_sl;
/* OPA logical states match IB logical states */
props->state = driver_lstate(ppd);
props->phys_state = hfi1_ibphys_portstate(ppd);
- props->port_cap_flags = ibp->port_cap_flags;
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
- props->max_msg_sz = 0x80000000;
- props->pkey_tbl_len = hfi1_get_npkeys(dd);
- props->bad_pkey_cntr = ibp->pkey_violations;
- props->qkey_viol_cntr = ibp->qkey_violations;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
- props->init_type_reply = 0;
/* Once we are a "first class" citizen and have added the OPA MTUs to
* the core we can advertise the larger MTU enum to the ULPs, for now
@@ -1483,27 +1367,6 @@ static int query_port(struct ib_device *ibdev, u8 port,
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
- props->subnet_timeout = ibp->subnet_timeout;
-
- return 0;
-}
-
-static int port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- err = query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- memset(immutable, 0, sizeof(*immutable));
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
- immutable->max_mad_size = OPA_MGMT_MAD_SIZE;
return 0;
}
@@ -1547,102 +1410,31 @@ bail:
return ret;
}
-static int modify_port(struct ib_device *ibdev, u8 port,
- int port_modify_mask, struct ib_port_modify *props)
-{
- struct hfi1_ibport *ibp = to_iport(ibdev, port);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- int ret = 0;
-
- ibp->port_cap_flags |= props->set_port_cap_mask;
- ibp->port_cap_flags &= ~props->clr_port_cap_mask;
- if (props->set_port_cap_mask || props->clr_port_cap_mask)
- hfi1_cap_mask_chg(ibp);
- if (port_modify_mask & IB_PORT_SHUTDOWN) {
- set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
- OPA_LINKDOWN_REASON_UNKNOWN);
- ret = set_link_state(ppd, HLS_DN_DOWNDEF);
- }
- if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
- ibp->qkey_violations = 0;
- return ret;
-}
-
-static int query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
+static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
{
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- int ret = 0;
-
- if (!port || port > dd->num_pports)
- ret = -EINVAL;
- else {
- struct hfi1_ibport *ibp = to_iport(ibdev, port);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
- gid->global.subnet_prefix = ibp->gid_prefix;
- if (index == 0)
- gid->global.interface_id = cpu_to_be64(ppd->guid);
- else if (index < HFI1_GUIDS_PER_PORT)
- gid->global.interface_id = ibp->guids[index - 1];
- else
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-static struct ib_pd *alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct hfi1_ibdev *dev = to_idev(ibdev);
- struct hfi1_pd *pd;
- struct ib_pd *ret;
-
- /*
- * This is actually totally arbitrary. Some correctness tests
- * assume there's a maximum number of PDs that can be allocated.
- * We don't actually have this limit, but we fail the test if
- * we allow allocations of more than we report for this value.
- */
-
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- spin_lock(&dev->n_pds_lock);
- if (dev->n_pds_allocated == hfi1_max_pds) {
- spin_unlock(&dev->n_pds_lock);
- kfree(pd);
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- dev->n_pds_allocated++;
- spin_unlock(&dev->n_pds_lock);
-
- /* ib_alloc_pd() will initialize pd->ibpd. */
- pd->user = udata != NULL;
-
- ret = &pd->ibpd;
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+ struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+ int ret;
-bail:
+ set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
+ OPA_LINKDOWN_REASON_UNKNOWN);
+ ret = set_link_state(ppd, HLS_DN_DOWNDEF);
return ret;
}
-static int dealloc_pd(struct ib_pd *ibpd)
+static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+ int guid_index, __be64 *guid)
{
- struct hfi1_pd *pd = to_ipd(ibpd);
- struct hfi1_ibdev *dev = to_idev(ibpd->device);
-
- spin_lock(&dev->n_pds_lock);
- dev->n_pds_allocated--;
- spin_unlock(&dev->n_pds_lock);
+ struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- kfree(pd);
+ if (guid_index == 0)
+ *guid = cpu_to_be64(ppd->guid);
+ else if (guid_index < HFI1_GUIDS_PER_PORT)
+ *guid = ibp->guids[guid_index - 1];
+ else
+ return -EINVAL;
return 0;
}
@@ -1657,101 +1449,57 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
return ibp->sl_to_sc[ah->sl];
}
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd;
u8 sc5;
- /* A multicast address requires a GRH (see ch. 8.4.1). */
- if (ah_attr->dlid >= HFI1_MULTICAST_LID_BASE &&
- ah_attr->dlid != HFI1_PERMISSIVE_LID &&
- !(ah_attr->ah_flags & IB_AH_GRH))
- goto bail;
- if ((ah_attr->ah_flags & IB_AH_GRH) &&
- ah_attr->grh.sgid_index >= HFI1_GUIDS_PER_PORT)
- goto bail;
- if (ah_attr->dlid == 0)
- goto bail;
- if (ah_attr->port_num < 1 ||
- ah_attr->port_num > ibdev->phys_port_cnt)
- goto bail;
- if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
- ib_rate_to_mbps(ah_attr->static_rate) < 0)
- goto bail;
- if (ah_attr->sl >= OPA_MAX_SLS)
- goto bail;
/* test the mapping for validity */
ibp = to_iport(ibdev, ah_attr->port_num);
ppd = ppd_from_ibp(ibp);
sc5 = ibp->sl_to_sc[ah_attr->sl];
dd = dd_from_ppd(ppd);
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
- goto bail;
+ return -EINVAL;
return 0;
-bail:
- return -EINVAL;
}
-/**
- * create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+static void hfi1_notify_new_ah(struct ib_device *ibdev,
+ struct ib_ah_attr *ah_attr,
+ struct rvt_ah *ah)
{
- struct hfi1_ah *ah;
- struct ib_ah *ret;
- struct hfi1_ibdev *dev = to_idev(pd->device);
- unsigned long flags;
-
- if (hfi1_check_ah(pd->device, ah_attr)) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
-
- ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- spin_lock_irqsave(&dev->n_ahs_lock, flags);
- if (dev->n_ahs_allocated == hfi1_max_ahs) {
- spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
- kfree(ah);
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- dev->n_ahs_allocated++;
- spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
- /* ib_create_ah() will initialize ah->ibah. */
- ah->attr = *ah_attr;
- atomic_set(&ah->refcount, 0);
+ struct hfi1_ibport *ibp;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
+ u8 sc5;
- ret = &ah->ibah;
+ /*
+ * Do not trust reading anything from rvt_ah at this point as it is not
+ * done being setup. We can however modify things which we need to set.
+ */
-bail:
- return ret;
+ ibp = to_iport(ibdev, ah_attr->port_num);
+ ppd = ppd_from_ibp(ibp);
+ sc5 = ibp->sl_to_sc[ah->attr.sl];
+ dd = dd_from_ppd(ppd);
+ ah->vl = sc_to_vlt(dd, sc5);
+ if (ah->vl < num_vls || ah->vl == 15)
+ ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
}
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
{
struct ib_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
- struct hfi1_qp *qp0;
+ struct rvt_qp *qp0;
memset(&attr, 0, sizeof(attr));
attr.dlid = dlid;
attr.port_num = ppd_from_ibp(ibp)->port;
rcu_read_lock();
- qp0 = rcu_dereference(ibp->qp[0]);
+ qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
ah = ib_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
@@ -1759,51 +1507,6 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
}
/**
- * destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int destroy_ah(struct ib_ah *ibah)
-{
- struct hfi1_ibdev *dev = to_idev(ibah->device);
- struct hfi1_ah *ah = to_iah(ibah);
- unsigned long flags;
-
- if (atomic_read(&ah->refcount) != 0)
- return -EBUSY;
-
- spin_lock_irqsave(&dev->n_ahs_lock, flags);
- dev->n_ahs_allocated--;
- spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
- kfree(ah);
-
- return 0;
-}
-
-static int modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
- struct hfi1_ah *ah = to_iah(ibah);
-
- if (hfi1_check_ah(ibah->device, ah_attr))
- return -EINVAL;
-
- ah->attr = *ah_attr;
-
- return 0;
-}
-
-static int query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
- struct hfi1_ah *ah = to_iah(ibah);
-
- *ah_attr = ah->attr;
-
- return 0;
-}
-
-/**
* hfi1_get_npkeys - return the size of the PKEY table for context 0
* @dd: the hfi1_ib device
*/
@@ -1812,54 +1515,6 @@ unsigned hfi1_get_npkeys(struct hfi1_devdata *dd)
return ARRAY_SIZE(dd->pport[0].pkeys);
}
-static int query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- int ret;
-
- if (index >= hfi1_get_npkeys(dd)) {
- ret = -EINVAL;
- goto bail;
- }
-
- *pkey = hfi1_get_pkey(to_iport(ibdev, port), index);
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the driver
- */
-
-static struct ib_ucontext *alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
-{
- struct hfi1_ucontext *context;
- struct ib_ucontext *ret;
-
- context = kmalloc(sizeof(*context), GFP_KERNEL);
- if (!context) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- ret = &context->ibucontext;
-
-bail:
- return ret;
-}
-
-static int dealloc_ucontext(struct ib_ucontext *context)
-{
- kfree(to_iucontext(context));
- return 0;
-}
-
static void init_ibport(struct hfi1_pportdata *ppd)
{
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -1871,28 +1526,21 @@ static void init_ibport(struct hfi1_pportdata *ppd)
ibp->sc_to_sl[i] = i;
}
- spin_lock_init(&ibp->lock);
+ spin_lock_init(&ibp->rvp.lock);
/* Set the prefix to the default value (see ch. 4.1.1) */
- ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
- ibp->sm_lid = 0;
+ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+ ibp->rvp.sm_lid = 0;
/* Below should only set bits defined in OPA PortInfo.CapabilityMask */
- ibp->port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
+ ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
IB_PORT_CAP_MASK_NOTICE_SUP;
- ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
- ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
- ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
- ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
- ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
- RCU_INIT_POINTER(ibp->qp[0], NULL);
- RCU_INIT_POINTER(ibp->qp[1], NULL);
-}
-
-static void verbs_txreq_kmem_cache_ctor(void *obj)
-{
- struct verbs_txreq *tx = obj;
-
- memset(tx, 0, sizeof(*tx));
+ ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+ ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+ ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+ ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+ ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+ RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+ RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
/**
@@ -1903,74 +1551,26 @@ static void verbs_txreq_kmem_cache_ctor(void *obj)
int hfi1_register_ib_device(struct hfi1_devdata *dd)
{
struct hfi1_ibdev *dev = &dd->verbs_dev;
- struct ib_device *ibdev = &dev->ibdev;
+ struct ib_device *ibdev = &dev->rdi.ibdev;
struct hfi1_pportdata *ppd = dd->pport;
- unsigned i, lk_tab_size;
+ unsigned i;
int ret;
size_t lcpysz = IB_DEVICE_NAME_MAX;
- u16 descq_cnt;
- char buf[TXREQ_NAME_LEN];
-
- ret = hfi1_qp_init(dev);
- if (ret)
- goto err_qp_init;
-
for (i = 0; i < dd->num_pports; i++)
init_ibport(ppd + i);
/* Only need to initialize non-zero fields. */
- spin_lock_init(&dev->n_pds_lock);
- spin_lock_init(&dev->n_ahs_lock);
- spin_lock_init(&dev->n_cqs_lock);
- spin_lock_init(&dev->n_qps_lock);
- spin_lock_init(&dev->n_srqs_lock);
- spin_lock_init(&dev->n_mcast_grps_lock);
+
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
- /*
- * The top hfi1_lkey_table_size bits are used to index the
- * table. The lower 8 bits can be owned by the user (copied from
- * the LKEY). The remaining bits act as a generation number or tag.
- */
- spin_lock_init(&dev->lk_table.lock);
- dev->lk_table.max = 1 << hfi1_lkey_table_size;
- /* ensure generation is at least 4 bits (keys.c) */
- if (hfi1_lkey_table_size > MAX_LKEY_TABLE_BITS) {
- dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
- hfi1_lkey_table_size, MAX_LKEY_TABLE_BITS);
- hfi1_lkey_table_size = MAX_LKEY_TABLE_BITS;
- }
- lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
- dev->lk_table.table = (struct hfi1_mregion __rcu **)
- vmalloc(lk_tab_size);
- if (dev->lk_table.table == NULL) {
- ret = -ENOMEM;
- goto err_lk;
- }
- RCU_INIT_POINTER(dev->dma_mr, NULL);
- for (i = 0; i < dev->lk_table.max; i++)
- RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
- INIT_LIST_HEAD(&dev->pending_mmaps);
- spin_lock_init(&dev->pending_lock);
seqlock_init(&dev->iowait_lock);
- dev->mmap_offset = PAGE_SIZE;
- spin_lock_init(&dev->mmap_offset_lock);
INIT_LIST_HEAD(&dev->txwait);
INIT_LIST_HEAD(&dev->memwait);
- descq_cnt = sdma_get_descq_cnt();
-
- snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
- /* SLAB_HWCACHE_ALIGN for AHG */
- dev->verbs_txreq_cache = kmem_cache_create(buf,
- sizeof(struct verbs_txreq),
- 0, SLAB_HWCACHE_ALIGN,
- verbs_txreq_kmem_cache_ctor);
- if (!dev->verbs_txreq_cache) {
- ret = -ENOMEM;
+ ret = verbs_txreq_init(dev);
+ if (ret)
goto err_verbs_txreq;
- }
/*
* The system image GUID is supposed to be the same for all
@@ -1983,142 +1583,119 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->node_guid = cpu_to_be64(ppd->guid);
- ibdev->uverbs_abi_ver = HFI1_UVERBS_ABI_VERSION;
- ibdev->uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
- (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
- ibdev->node_type = RDMA_NODE_IB_CA;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->num_comp_vectors = 1;
ibdev->dma_device = &dd->pcidev->dev;
- ibdev->query_device = query_device;
ibdev->modify_device = modify_device;
- ibdev->query_port = query_port;
- ibdev->modify_port = modify_port;
- ibdev->query_pkey = query_pkey;
- ibdev->query_gid = query_gid;
- ibdev->alloc_ucontext = alloc_ucontext;
- ibdev->dealloc_ucontext = dealloc_ucontext;
- ibdev->alloc_pd = alloc_pd;
- ibdev->dealloc_pd = dealloc_pd;
- ibdev->create_ah = create_ah;
- ibdev->destroy_ah = destroy_ah;
- ibdev->modify_ah = modify_ah;
- ibdev->query_ah = query_ah;
- ibdev->create_srq = hfi1_create_srq;
- ibdev->modify_srq = hfi1_modify_srq;
- ibdev->query_srq = hfi1_query_srq;
- ibdev->destroy_srq = hfi1_destroy_srq;
- ibdev->create_qp = hfi1_create_qp;
- ibdev->modify_qp = hfi1_modify_qp;
- ibdev->query_qp = hfi1_query_qp;
- ibdev->destroy_qp = hfi1_destroy_qp;
- ibdev->post_send = post_send;
- ibdev->post_recv = post_receive;
- ibdev->post_srq_recv = hfi1_post_srq_receive;
- ibdev->create_cq = hfi1_create_cq;
- ibdev->destroy_cq = hfi1_destroy_cq;
- ibdev->resize_cq = hfi1_resize_cq;
- ibdev->poll_cq = hfi1_poll_cq;
- ibdev->req_notify_cq = hfi1_req_notify_cq;
- ibdev->get_dma_mr = hfi1_get_dma_mr;
- ibdev->reg_user_mr = hfi1_reg_user_mr;
- ibdev->dereg_mr = hfi1_dereg_mr;
- ibdev->alloc_mr = hfi1_alloc_mr;
- ibdev->alloc_fmr = hfi1_alloc_fmr;
- ibdev->map_phys_fmr = hfi1_map_phys_fmr;
- ibdev->unmap_fmr = hfi1_unmap_fmr;
- ibdev->dealloc_fmr = hfi1_dealloc_fmr;
- ibdev->attach_mcast = hfi1_multicast_attach;
- ibdev->detach_mcast = hfi1_multicast_detach;
+
+ /* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
- ibdev->mmap = hfi1_mmap;
- ibdev->dma_ops = &hfi1_dma_mapping_ops;
- ibdev->get_port_immutable = port_immutable;
strncpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
- ret = ib_register_device(ibdev, hfi1_create_port_files);
- if (ret)
- goto err_reg;
-
- ret = hfi1_create_agents(dev);
+ /*
+ * Fill in rvt info object.
+ */
+ dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
+ dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
+ dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
+ dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
+ dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
+ dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be;
+ dd->verbs_dev.rdi.driver_f.query_port_state = query_port;
+ dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port;
+ dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg;
+ /*
+ * Fill in rvt info device attributes.
+ */
+ hfi1_fill_device_attr(dd);
+
+ /* queue pair */
+ dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size;
+ dd->verbs_dev.rdi.dparms.qpn_start = 0;
+ dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+ dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
+ dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
+ dd->verbs_dev.rdi.dparms.qpn_res_end =
+ dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+ dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
+ dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
+ dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
+ dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
+ dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
+ dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
+
+ dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+ dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
+ dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
+ dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
+ dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
+ dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
+ dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+ dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
+ dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue;
+ dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp;
+ dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+ dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp;
+ dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
+ dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
+ dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+ dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+
+ /* completeion queue */
+ snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+ sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+ "hfi1_cq%d", dd->unit);
+ dd->verbs_dev.rdi.dparms.node = dd->node;
+
+ /* misc settings */
+ dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */
+ dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
+ dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+ dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+
+ ppd = dd->pport;
+ for (i = 0; i < dd->num_pports; i++, ppd++)
+ rvt_init_port(&dd->verbs_dev.rdi,
+ &ppd->ibport_data.rvp,
+ i,
+ ppd->pkeys);
+
+ ret = rvt_register_device(&dd->verbs_dev.rdi);
if (ret)
- goto err_agents;
+ goto err_verbs_txreq;
ret = hfi1_verbs_register_sysfs(dd);
if (ret)
goto err_class;
- goto bail;
+ return ret;
err_class:
- hfi1_free_agents(dev);
-err_agents:
- ib_unregister_device(ibdev);
-err_reg:
+ rvt_unregister_device(&dd->verbs_dev.rdi);
err_verbs_txreq:
- kmem_cache_destroy(dev->verbs_txreq_cache);
- vfree(dev->lk_table.table);
-err_lk:
- hfi1_qp_exit(dev);
-err_qp_init:
+ verbs_txreq_exit(dev);
dd_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
return ret;
}
void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
{
struct hfi1_ibdev *dev = &dd->verbs_dev;
- struct ib_device *ibdev = &dev->ibdev;
hfi1_verbs_unregister_sysfs(dd);
- hfi1_free_agents(dev);
-
- ib_unregister_device(ibdev);
+ rvt_unregister_device(&dd->verbs_dev.rdi);
if (!list_empty(&dev->txwait))
dd_dev_err(dd, "txwait list not empty!\n");
if (!list_empty(&dev->memwait))
dd_dev_err(dd, "memwait list not empty!\n");
- if (dev->dma_mr)
- dd_dev_err(dd, "DMA MR not NULL!\n");
- hfi1_qp_exit(dev);
del_timer_sync(&dev->mem_timer);
- kmem_cache_destroy(dev->verbs_txreq_cache);
- vfree(dev->lk_table.table);
+ verbs_txreq_exit(dev);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -2126,7 +1703,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_ib_header *hdr = packet->hdr;
- struct hfi1_qp *qp = packet->qp;
+ struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
u8 sl, sc5, sc4_bit, svc_type;
@@ -2149,7 +1726,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
svc_type = IB_CC_SVCTYPE_UD;
break;
default:
- ibp->n_pkt_drops++;
+ ibp->rvp.n_pkt_drops++;
return;
}
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 286e468b0479..6c4670fffdbb 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -59,9 +56,13 @@
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/completion.h>
+#include <linux/slab.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_mad.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
+#include <rdma/rdmavt_cq.h>
struct hfi1_ctxtdata;
struct hfi1_pportdata;
@@ -79,12 +80,6 @@ struct hfi1_packet;
*/
#define HFI1_UVERBS_ABI_VERSION 2
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
-
#define IB_SEQ_NAK (3 << 29)
/* AETH NAK opcode values */
@@ -95,17 +90,6 @@ struct hfi1_packet;
#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
#define IB_NAK_INVALID_RD_REQUEST 0x64
-/* Flags for checking QP state (see ib_hfi1_state_ops[]) */
-#define HFI1_POST_SEND_OK 0x01
-#define HFI1_POST_RECV_OK 0x02
-#define HFI1_PROCESS_RECV_OK 0x04
-#define HFI1_PROCESS_SEND_OK 0x08
-#define HFI1_PROCESS_NEXT_SEND_OK 0x10
-#define HFI1_FLUSH_SEND 0x20
-#define HFI1_FLUSH_RECV 0x40
-#define HFI1_PROCESS_OR_FLUSH_SEND \
- (HFI1_PROCESS_SEND_OK | HFI1_FLUSH_SEND)
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -208,341 +192,18 @@ struct hfi1_pio_header {
} __packed;
/*
- * used for force cacheline alignment for AHG
- */
-struct tx_pio_header {
- struct hfi1_pio_header phdr;
-} ____cacheline_aligned;
-
-/*
- * There is one struct hfi1_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct hfi1_mcast_qp.
- */
-struct hfi1_mcast_qp {
- struct list_head list;
- struct hfi1_qp *qp;
-};
-
-struct hfi1_mcast {
- struct rb_node rb_node;
- union ib_gid mgid;
- struct list_head qp_list;
- wait_queue_head_t wait;
- atomic_t refcount;
- int n_attached;
-};
-
-/* Protection domain */
-struct hfi1_pd {
- struct ib_pd ibpd;
- int user; /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct hfi1_ah {
- struct ib_ah ibah;
- struct ib_ah_attr attr;
- atomic_t refcount;
-};
-
-/*
- * This structure is used by hfi1_mmap() to validate an offset
- * when an mmap() request is made. The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct hfi1_mmap_info {
- struct list_head pending_mmaps;
- struct ib_ucontext *context;
- void *obj;
- __u64 offset;
- struct kref ref;
- unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct hfi1_cq_wc {
- u32 head; /* index of next entry to fill */
- u32 tail; /* index of next ib_poll_cq() entry */
- union {
- /* these are actually size ibcq.cqe + 1 */
- struct ib_uverbs_wc uqueue[0];
- struct ib_wc kqueue[0];
- };
-};
-
-/*
- * The completion queue structure.
- */
-struct hfi1_cq {
- struct ib_cq ibcq;
- struct kthread_work comptask;
- struct hfi1_devdata *dd;
- spinlock_t lock; /* protect changes in this struct */
- u8 notify;
- u8 triggered;
- struct hfi1_cq_wc *queue;
- struct hfi1_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * Used by the verbs layer.
- */
-struct hfi1_seg {
- void *vaddr;
- size_t length;
-};
-
-/* The number of hfi1_segs that fit in a page. */
-#define HFI1_SEGSZ (PAGE_SIZE / sizeof(struct hfi1_seg))
-
-struct hfi1_segarray {
- struct hfi1_seg segs[HFI1_SEGSZ];
-};
-
-struct hfi1_mregion {
- struct ib_pd *pd; /* shares refcnt of ibmr.pd */
- u64 user_base; /* User's address for this region */
- u64 iova; /* IB start address of this region */
- size_t length;
- u32 lkey;
- u32 offset; /* offset (bytes) to start of region */
- int access_flags;
- u32 max_segs; /* number of hfi1_segs in all the arrays */
- u32 mapsz; /* size of the map array */
- u8 page_shift; /* 0 - non unform/non powerof2 sizes */
- u8 lkey_published; /* in global table */
- struct completion comp; /* complete when refcount goes to zero */
- atomic_t refcount;
- struct hfi1_segarray *map[0]; /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct hfi1_sge {
- struct hfi1_mregion *mr;
- void *vaddr; /* kernel virtual address of segment */
- u32 sge_length; /* length of the SGE */
- u32 length; /* remaining length of the segment */
- u16 m; /* current index: mr->map[m] */
- u16 n; /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct hfi1_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct hfi1_mregion mr; /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct hfi1_swqe {
- union {
- struct ib_send_wr wr; /* don't use wr.sg_list */
- struct ib_rdma_wr rdma_wr;
- struct ib_atomic_wr atomic_wr;
- struct ib_ud_wr ud_wr;
- };
- u32 psn; /* first packet sequence number */
- u32 lpsn; /* last packet sequence number */
- u32 ssn; /* send sequence number */
- u32 length; /* total length of data in sg_list */
- struct hfi1_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct hfi1_rwqe {
- u64 wr_id;
- u8 num_sge;
- struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
+ * hfi1 specific data structures that will be hidden from rvt after the queue
+ * pair is made common
*/
-struct hfi1_rwq {
- u32 head; /* new work requests posted to the head */
- u32 tail; /* receives pull requests from here. */
- struct hfi1_rwqe wq[0];
-};
-
-struct hfi1_rq {
- struct hfi1_rwq *wq;
- u32 size; /* size of RWQE array */
- u8 max_sge;
- /* protect changes in this struct */
- spinlock_t lock ____cacheline_aligned_in_smp;
-};
-
-struct hfi1_srq {
- struct ib_srq ibsrq;
- struct hfi1_rq rq;
- struct hfi1_mmap_info *ip;
- /* send signal when number of RWQEs < limit */
- u32 limit;
-};
-
-struct hfi1_sge_state {
- struct hfi1_sge *sg_list; /* next SGE to be used if any */
- struct hfi1_sge sge; /* progress state for the current SGE */
- u32 total_len;
- u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct hfi1_ack_entry {
- u8 opcode;
- u8 sent;
- u32 psn;
- u32 lpsn;
- union {
- struct hfi1_sge rdma_sge;
- u64 atomic_data;
- };
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct hfi1_qp {
- struct ib_qp ibqp;
- /* read mostly fields above and below */
- struct ib_ah_attr remote_ah_attr;
- struct ib_ah_attr alt_ah_attr;
- struct hfi1_qp __rcu *next; /* link list for QPN hash table */
- struct hfi1_swqe *s_wq; /* send work queue */
- struct hfi1_mmap_info *ip;
- struct ahg_ib_header *s_hdr; /* next packet header to send */
- /* sc for UC/RC QPs - based on ah for UD */
- u8 s_sc;
- unsigned long timeout_jiffies; /* computed from timeout */
-
- enum ib_mtu path_mtu;
- int srate_mbps; /* s_srate (below) converted to Mbit/s */
- u32 remote_qpn;
- u32 pmtu; /* decoded from path_mtu */
- u32 qkey; /* QKEY for this QP (for UD or RD) */
- u32 s_size; /* send work queue size */
- u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
- u32 s_ahgpsn; /* set to the psn in the copy of the header */
-
- u8 state; /* QP state */
- u8 allowed_ops; /* high order bits of allowed opcodes */
- u8 qp_access_flags;
- u8 alt_timeout; /* Alternate path timeout for this QP */
- u8 timeout; /* Timeout for this QP */
- u8 s_srate;
- u8 s_mig_state;
- u8 port_num;
- u8 s_pkey_index; /* PKEY index to use */
- u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
- u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
- u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
- u8 s_retry_cnt; /* number of times to retry */
- u8 s_rnr_retry_cnt;
- u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 s_max_sge; /* size of s_wq->sg_list */
- u8 s_draining;
-
- /* start of read/write fields */
- atomic_t refcount ____cacheline_aligned_in_smp;
- wait_queue_head_t wait;
-
-
- struct hfi1_ack_entry s_ack_queue[HFI1_MAX_RDMA_ATOMIC + 1]
- ____cacheline_aligned_in_smp;
- struct hfi1_sge_state s_rdma_read_sge;
-
- spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
- unsigned long r_aflags;
- u64 r_wr_id; /* ID for current receive WQE */
- u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
- u32 r_len; /* total length of r_sge */
- u32 r_rcv_len; /* receive data len processed */
- u32 r_psn; /* expected rcv packet sequence number */
- u32 r_msn; /* message sequence number */
-
- u8 r_adefered; /* number of acks defered */
- u8 r_state; /* opcode of last packet received */
- u8 r_flags;
- u8 r_head_ack_queue; /* index into s_ack_queue[] */
-
- struct list_head rspwait; /* link for waiting to respond */
-
- struct hfi1_sge_state r_sge; /* current receive data */
- struct hfi1_rq r_rq; /* receive work queue */
-
- spinlock_t s_lock ____cacheline_aligned_in_smp;
- struct hfi1_sge_state *s_cur_sge;
- u32 s_flags;
- struct hfi1_swqe *s_wqe;
- struct hfi1_sge_state s_sge; /* current send request data */
- struct hfi1_mregion *s_rdma_mr;
- struct sdma_engine *s_sde; /* current sde */
- u32 s_cur_size; /* size of send packet in bytes */
- u32 s_len; /* total length of s_sge */
- u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
- u32 s_next_psn; /* PSN for next request */
- u32 s_last_psn; /* last response PSN processed */
- u32 s_sending_psn; /* lowest PSN that is being sent */
- u32 s_sending_hpsn; /* highest PSN that is being sent */
- u32 s_psn; /* current packet sequence number */
- u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
- u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
- u32 s_head; /* new entries added here */
- u32 s_tail; /* next entry to process */
- u32 s_cur; /* current work queue entry */
- u32 s_acked; /* last un-ACK'ed entry */
- u32 s_last; /* last completed entry */
- u32 s_ssn; /* SSN of tail entry */
- u32 s_lsn; /* limit sequence number (credit) */
- u16 s_hdrwords; /* size of s_hdr in 32 bit words */
- u16 s_rdma_ack_cnt;
- s8 s_ahgidx;
- u8 s_state; /* opcode of last packet sent */
- u8 s_ack_state; /* opcode of packet to ACK */
- u8 s_nak_state; /* non-zero if NAK is pending */
- u8 r_nak_state; /* non-zero if NAK is pending */
- u8 s_retry; /* requester retry counter */
- u8 s_rnr_retry; /* requester RNR retry counter */
- u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
- u8 s_tail_ack_queue; /* index into s_ack_queue[] */
-
- struct hfi1_sge_state s_ack_rdma_sge;
- struct timer_list s_timer;
-
+struct hfi1_qp_priv {
+ struct ahg_ib_header *s_hdr; /* next header to send */
+ struct sdma_engine *s_sde; /* current sde */
+ struct send_context *s_sendcontext; /* current sendcontext */
+ u8 s_sc; /* SC[0..4] for next packet */
+ u8 r_adefered; /* number of acks defered */
struct iowait s_iowait;
-
- struct hfi1_sge r_sg_list[0] /* verified SGEs */
- ____cacheline_aligned_in_smp;
+ struct timer_list s_rnr_timer;
+ struct rvt_qp *owner;
};
/*
@@ -553,123 +214,11 @@ struct hfi1_pkt_state {
struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
+ struct verbs_txreq *s_txreq;
};
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define HFI1_R_WRID_VALID 0
-#define HFI1_R_REWIND_SGE 1
-
-/*
- * Bit definitions for r_flags.
- */
-#define HFI1_R_REUSE_SGE 0x01
-#define HFI1_R_RDMAR_SEQ 0x02
-/* defer ack until end of interrupt session */
-#define HFI1_R_RSP_DEFERED_ACK 0x04
-/* relay ack to send engine */
-#define HFI1_R_RSP_SEND 0x08
-#define HFI1_R_COMM_EST 0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * HFI1_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * HFI1_S_BUSY - send tasklet is processing the QP
- * HFI1_S_TIMER - the RC retry timer is active
- * HFI1_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * HFI1_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- * before processing the next SWQE
- * HFI1_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- * before processing the next SWQE
- * HFI1_S_WAIT_RNR - waiting for RNR timeout
- * HFI1_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * HFI1_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- * next send completion entry not via send DMA
- * HFI1_S_WAIT_PIO - waiting for a send buffer to be available
- * HFI1_S_WAIT_TX - waiting for a struct verbs_txreq to be available
- * HFI1_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * HFI1_S_WAIT_KMEM - waiting for kernel memory to be available
- * HFI1_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * HFI1_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * HFI1_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- * HFI1_S_ECN - a BECN was queued to the send engine
- */
-#define HFI1_S_SIGNAL_REQ_WR 0x0001
-#define HFI1_S_BUSY 0x0002
-#define HFI1_S_TIMER 0x0004
-#define HFI1_S_RESP_PENDING 0x0008
-#define HFI1_S_ACK_PENDING 0x0010
-#define HFI1_S_WAIT_FENCE 0x0020
-#define HFI1_S_WAIT_RDMAR 0x0040
-#define HFI1_S_WAIT_RNR 0x0080
-#define HFI1_S_WAIT_SSN_CREDIT 0x0100
-#define HFI1_S_WAIT_DMA 0x0200
-#define HFI1_S_WAIT_PIO 0x0400
-#define HFI1_S_WAIT_TX 0x0800
-#define HFI1_S_WAIT_DMA_DESC 0x1000
-#define HFI1_S_WAIT_KMEM 0x2000
-#define HFI1_S_WAIT_PSN 0x4000
-#define HFI1_S_WAIT_ACK 0x8000
-#define HFI1_S_SEND_ONE 0x10000
-#define HFI1_S_UNLIMITED_CREDIT 0x20000
-#define HFI1_S_AHG_VALID 0x40000
-#define HFI1_S_AHG_CLEAR 0x80000
-#define HFI1_S_ECN 0x100000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define HFI1_S_ANY_WAIT_IO (HFI1_S_WAIT_PIO | HFI1_S_WAIT_TX | \
- HFI1_S_WAIT_DMA_DESC | HFI1_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define HFI1_S_ANY_WAIT_SEND (HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | \
- HFI1_S_WAIT_RNR | HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_DMA | \
- HFI1_S_WAIT_PSN | HFI1_S_WAIT_ACK)
-
-#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | HFI1_S_ANY_WAIT_SEND)
-
#define HFI1_PSN_CREDIT 16
-/*
- * Since struct hfi1_swqe is not a fixed size, we can't simply index into
- * struct hfi1_qp.s_wq. This function does the array index computation.
- */
-static inline struct hfi1_swqe *get_swqe_ptr(struct hfi1_qp *qp,
- unsigned n)
-{
- return (struct hfi1_swqe *)((char *)qp->s_wq +
- (sizeof(struct hfi1_swqe) +
- qp->s_max_sge *
- sizeof(struct hfi1_sge)) * n);
-}
-
-/*
- * Since struct hfi1_rwqe is not a fixed size, we can't simply index into
- * struct hfi1_rwq.wq. This function does the array index computation.
- */
-static inline struct hfi1_rwqe *get_rwqe_ptr(struct hfi1_rq *rq, unsigned n)
-{
- return (struct hfi1_rwqe *)
- ((char *) rq->wq->wq +
- (sizeof(struct hfi1_rwqe) +
- rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct hfi1_lkey_table {
- spinlock_t lock; /* protect changes in this struct */
- u32 next; /* next unused index (speeds search) */
- u32 gen; /* generation count */
- u32 max; /* size of the table */
- struct hfi1_mregion __rcu **table;
-};
-
struct hfi1_opcode_stats {
u64 n_packets; /* number of packets */
u64 n_bytes; /* total number of bytes */
@@ -690,75 +239,20 @@ static inline void inc_opstats(
}
struct hfi1_ibport {
- struct hfi1_qp __rcu *qp[2];
- struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
- struct hfi1_ah *sm_ah;
- struct hfi1_ah *smi_ah;
- struct rb_root mcast_tree;
- spinlock_t lock; /* protect changes in this struct */
-
- /* non-zero when timer is set */
- unsigned long mkey_lease_timeout;
- unsigned long trap_timeout;
- __be64 gid_prefix; /* in network order */
- __be64 mkey;
+ struct rvt_qp __rcu *qp[2];
+ struct rvt_ibport rvp;
+
__be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */
- u64 tid; /* TID for traps */
- u64 n_rc_resends;
- u64 n_seq_naks;
- u64 n_rdma_seq;
- u64 n_rnr_naks;
- u64 n_other_naks;
- u64 n_loop_pkts;
- u64 n_pkt_drops;
- u64 n_vl15_dropped;
- u64 n_rc_timeouts;
- u64 n_dmawait;
- u64 n_unaligned;
- u64 n_rc_dupreq;
- u64 n_rc_seqnak;
-
- /* Hot-path per CPU counters to avoid cacheline trading to update */
- u64 z_rc_acks;
- u64 z_rc_qacks;
- u64 z_rc_delayed_comp;
- u64 __percpu *rc_acks;
- u64 __percpu *rc_qacks;
- u64 __percpu *rc_delayed_comp;
-
- u32 port_cap_flags;
- u32 pma_sample_start;
- u32 pma_sample_interval;
- __be16 pma_counter_select[5];
- u16 pma_tag;
- u16 pkey_violations;
- u16 qkey_violations;
- u16 mkey_violations;
- u16 mkey_lease_period;
- u16 sm_lid;
- u16 repress_traps;
- u8 sm_sl;
- u8 mkeyprot;
- u8 subnet_timeout;
- u8 vl_high_limit;
+
/* the first 16 entries are sl_to_vl for !OPA */
u8 sl_to_sc[32];
u8 sc_to_sl[32];
};
-
-struct hfi1_qp_ibdev;
struct hfi1_ibdev {
- struct ib_device ibdev;
- struct list_head pending_mmaps;
- spinlock_t mmap_offset_lock; /* protect mmap_offset */
- u32 mmap_offset;
- struct hfi1_mregion __rcu *dma_mr;
-
- struct hfi1_qp_ibdev *qp_dev;
+ struct rvt_dev_info rdi; /* Must be first */
/* QP numbers are shared by all IB ports */
- struct hfi1_lkey_table lk_table;
/* protect wait lists */
seqlock_t iowait_lock;
struct list_head txwait; /* list for wait verbs_txreq */
@@ -767,26 +261,11 @@ struct hfi1_ibdev {
struct kmem_cache *verbs_txreq_cache;
struct timer_list mem_timer;
- /* other waiters */
- spinlock_t pending_lock;
-
u64 n_piowait;
+ u64 n_piodrain;
u64 n_txwait;
u64 n_kmem_wait;
- u64 n_send_schedule;
-
- u32 n_pds_allocated; /* number of PDs allocated for device */
- spinlock_t n_pds_lock;
- u32 n_ahs_allocated; /* number of AHs allocated for device */
- spinlock_t n_ahs_lock;
- u32 n_cqs_allocated; /* number of CQs allocated for device */
- spinlock_t n_cqs_lock;
- u32 n_qps_allocated; /* number of QPs allocated for device */
- spinlock_t n_qps_lock;
- u32 n_srqs_allocated; /* number of SRQs allocated for device */
- spinlock_t n_srqs_lock;
- u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
- spinlock_t n_mcast_grps_lock;
+
#ifdef CONFIG_DEBUG_FS
/* per HFI debugfs */
struct dentry *hfi1_ibdev_dbg;
@@ -795,66 +274,31 @@ struct hfi1_ibdev {
#endif
};
-struct hfi1_verbs_counters {
- u64 symbol_error_counter;
- u64 link_error_recovery_counter;
- u64 link_downed_counter;
- u64 port_rcv_errors;
- u64 port_rcv_remphys_errors;
- u64 port_xmit_discards;
- u64 port_xmit_data;
- u64 port_rcv_data;
- u64 port_xmit_packets;
- u64 port_rcv_packets;
- u32 local_link_integrity_errors;
- u32 excessive_buffer_overrun_errors;
- u32 vl15_dropped;
-};
-
-static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct hfi1_mr, ibmr);
-}
-
-static inline struct hfi1_pd *to_ipd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct hfi1_pd, ibpd);
-}
-
-static inline struct hfi1_ah *to_iah(struct ib_ah *ibah)
-{
- return container_of(ibah, struct hfi1_ah, ibah);
-}
-
-static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq)
+static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
{
- return container_of(ibcq, struct hfi1_cq, ibcq);
-}
+ struct rvt_dev_info *rdi;
-static inline struct hfi1_srq *to_isrq(struct ib_srq *ibsrq)
-{
- return container_of(ibsrq, struct hfi1_srq, ibsrq);
+ rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+ return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct hfi1_qp *to_iqp(struct ib_qp *ibqp)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{
- return container_of(ibqp, struct hfi1_qp, ibqp);
-}
+ struct hfi1_qp_priv *priv;
-static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct hfi1_ibdev, ibdev);
+ priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait);
+ return priv->owner;
}
/*
* Send if not busy or waiting for I/O and either
* a RC response is pending or we can process send work requests.
*/
-static inline int hfi1_send_ok(struct hfi1_qp *qp)
+static inline int hfi1_send_ok(struct rvt_qp *qp)
{
- return !(qp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
- (qp->s_hdrwords || (qp->s_flags & HFI1_S_RESP_PENDING) ||
- !(qp->s_flags & HFI1_S_ANY_WAIT_SEND));
+ return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+ (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
/*
@@ -862,7 +306,7 @@ static inline int hfi1_send_ok(struct hfi1_qp *qp)
*/
void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
u32 qp1, u32 qp2, u16 lid1, u16 lid2);
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp);
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
@@ -870,8 +314,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_mad_hdr *in_mad, size_t in_mad_size,
struct ib_mad_hdr *out_mad, size_t *out_mad_size,
u16 *out_mad_pkey_index);
-int hfi1_create_agents(struct hfi1_ibdev *dev);
-void hfi1_free_agents(struct hfi1_ibdev *dev);
/*
* The PSN_MASK and PSN_SHIFT allow for
@@ -901,7 +343,7 @@ void hfi1_free_agents(struct hfi1_ibdev *dev);
*/
static inline int cmp_msn(u32 a, u32 b)
{
- return (((int) a) - ((int) b)) << 8;
+ return (((int)a) - ((int)b)) << 8;
}
/*
@@ -910,7 +352,7 @@ static inline int cmp_msn(u32 a, u32 b)
*/
static inline int cmp_psn(u32 a, u32 b)
{
- return (((int) a) - ((int) b)) << PSN_SHIFT;
+ return (((int)a) - ((int)b)) << PSN_SHIFT;
}
/*
@@ -929,23 +371,15 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
}
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid);
-
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp);
-
struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx);
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps);
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-void hfi1_copy_sge(struct hfi1_sge_state *ss, void *data, u32 length,
- int release);
+void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
+ int release, int copy_last);
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release);
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
void hfi1_cnp_rcv(struct hfi1_packet *packet);
@@ -957,147 +391,75 @@ void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
struct hfi1_ib_header *hdr,
u32 rcv_flags,
- struct hfi1_qp *qp);
+ struct rvt_qp *qp);
u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
-
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
void hfi1_rc_rnr_retry(unsigned long arg);
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
+void hfi1_rc_timeout(unsigned long arg);
+void hfi1_del_timers_sync(struct rvt_qp *qp);
+void hfi1_stop_rc_timers(struct rvt_qp *qp);
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err);
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
void hfi1_ud_rcv(struct hfi1_packet *packet);
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region);
-
-void hfi1_free_lkey(struct hfi1_mregion *mr);
-
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
- struct hfi1_sge *isge, struct ib_sge *sge, int acc);
-
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
- u32 len, u64 vaddr, u32 rkey, int acc);
-
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
-
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask,
- struct ib_udata *udata);
-
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int hfi1_destroy_srq(struct ib_srq *ibsrq);
-
-int hfi1_cq_init(struct hfi1_devdata *dd);
-
-void hfi1_cq_exit(struct hfi1_devdata *dd);
-
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int sig);
-
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *hfi1_create_cq(
- struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-
-int hfi1_destroy_cq(struct ib_cq *ibcq);
-
-int hfi1_req_notify_cq(
- struct ib_cq *ibcq,
- enum ib_cq_notify_flags notify_flags);
-
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_udata *udata);
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
-int hfi1_dereg_mr(struct ib_mr *ibmr);
+void hfi1_migrate_qp(struct rvt_qp *qp);
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_entries);
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
-int hfi1_unmap_fmr(struct list_head *fmr_list);
+extern const u32 rc_only_opcode;
+extern const u32 uc_only_opcode;
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void hfi1_get_mr(struct hfi1_mregion *mr)
-{
- atomic_inc(&mr->refcount);
-}
-
-static inline void hfi1_put_mr(struct hfi1_mregion *mr)
+static inline u8 get_opcode(struct hfi1_ib_header *h)
{
- if (unlikely(atomic_dec_and_test(&mr->refcount)))
- complete(&mr->comp);
-}
+ u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
-static inline void hfi1_put_ss(struct hfi1_sge_state *ss)
-{
- while (ss->num_sge) {
- hfi1_put_mr(ss->sge.mr);
- if (--ss->num_sge)
- ss->sge = *ss->sg_list++;
- }
+ if (lnh == IB_LNH_IBA_LOCAL)
+ return be32_to_cpu(h->u.oth.bth[0]) >> 24;
+ else
+ return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-void hfi1_release_mmap_info(struct kref *ref);
-
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size,
- struct ib_ucontext *context,
- void *obj);
-
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
- u32 size, void *obj);
-
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only);
-
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
- int has_grh, struct hfi1_qp *qp, u32 bth0);
+ int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
- u32 bth0, u32 bth2, int middle);
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+ u32 bth0, u32 bth2, int middle,
+ struct hfi1_pkt_state *ps);
-void hfi1_do_send(struct work_struct *work);
+void _hfi1_do_send(struct work_struct *work);
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_do_send(struct rvt_qp *qp);
+
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct hfi1_qp *qp, int is_fecn);
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn);
-int hfi1_make_rc_req(struct hfi1_qp *qp);
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-int hfi1_make_uc_req(struct hfi1_qp *qp);
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-int hfi1_make_ud_req(struct hfi1_qp *qp);
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
int hfi1_register_ib_device(struct hfi1_devdata *);
@@ -1107,24 +469,42 @@ void hfi1_ib_rcv(struct hfi1_packet *packet);
unsigned hfi1_get_npkeys(struct hfi1_devdata *);
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5);
+int hfi1_wss_init(void);
+void hfi1_wss_exit(void);
+
+/* platform specific: return the lowest level cache (llc) size, in KiB */
+static inline int wss_llc_size(void)
+{
+ /* assume that the boot CPU value is universal for all CPUs */
+ return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static inline void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+ /*
+ * Use the only available X64 cacheless copy. Add a __user cast
+ * to quiet sparse. The src agument is already in the kernel so
+ * there are no security issues. The extra fault recovery machinery
+ * is not invoked.
+ */
+ __copy_user_nocache(dst, (void __user *)src, n, 0);
+}
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
extern const u8 hdr_len_by_opcode[];
-extern const int ib_hfi1_state_ops[];
+extern const int ib_rvt_state_ops[];
extern __be64 ib_hfi1_sys_image_guid; /* in network order */
-extern unsigned int hfi1_lkey_table_size;
-
extern unsigned int hfi1_max_cqes;
extern unsigned int hfi1_max_cqs;
@@ -1145,8 +525,8 @@ extern unsigned int hfi1_max_srq_sges;
extern unsigned int hfi1_max_srq_wrs;
-extern const u32 ib_hfi1_rnr_table[];
+extern unsigned short piothreshold;
-extern struct ib_dma_mapping_ops hfi1_dma_mapping_ops;
+extern const u32 ib_hfi1_rnr_table[];
#endif /* HFI1_VERBS_H */
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c
new file mode 100644
index 000000000000..bc95c4112c61
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs_txreq.h"
+#include "qp.h"
+#include "trace.h"
+
+#define TXREQ_LEN 24
+
+void hfi1_put_txreq(struct verbs_txreq *tx)
+{
+ struct hfi1_ibdev *dev;
+ struct rvt_qp *qp;
+ unsigned long flags;
+ unsigned int seq;
+ struct hfi1_qp_priv *priv;
+
+ qp = tx->qp;
+ dev = to_idev(qp->ibqp.device);
+
+ if (tx->mr)
+ rvt_put_mr(tx->mr);
+
+ sdma_txclean(dd_from_dev(dev), &tx->txreq);
+
+ /* Free verbs_txreq and return to slab cache */
+ kmem_cache_free(dev->verbs_txreq_cache, tx);
+
+ do {
+ seq = read_seqbegin(&dev->iowait_lock);
+ if (!list_empty(&dev->txwait)) {
+ struct iowait *wait;
+
+ write_seqlock_irqsave(&dev->iowait_lock, flags);
+ wait = list_first_entry(&dev->txwait, struct iowait,
+ list);
+ qp = iowait_to_qp(wait);
+ priv = qp->priv;
+ list_del_init(&priv->s_iowait.list);
+ /* refcount held until actual wake up */
+ write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
+ break;
+ }
+ } while (read_seqretry(&dev->iowait_lock, seq));
+}
+
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+ struct rvt_qp *qp)
+{
+ struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ write_seqlock(&dev->iowait_lock);
+ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+ struct hfi1_qp_priv *priv;
+
+ tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+ if (tx)
+ goto out;
+ priv = qp->priv;
+ if (list_empty(&priv->s_iowait.list)) {
+ dev->n_txwait++;
+ qp->s_flags |= RVT_S_WAIT_TX;
+ list_add_tail(&priv->s_iowait.list, &dev->txwait);
+ trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
+ atomic_inc(&qp->refcount);
+ }
+ qp->s_flags &= ~RVT_S_BUSY;
+ }
+out:
+ write_sequnlock(&dev->iowait_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return tx;
+}
+
+static void verbs_txreq_kmem_cache_ctor(void *obj)
+{
+ struct verbs_txreq *tx = (struct verbs_txreq *)obj;
+
+ memset(tx, 0, sizeof(*tx));
+}
+
+int verbs_txreq_init(struct hfi1_ibdev *dev)
+{
+ char buf[TXREQ_LEN];
+ struct hfi1_devdata *dd = dd_from_dev(dev);
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
+ dev->verbs_txreq_cache = kmem_cache_create(buf,
+ sizeof(struct verbs_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ verbs_txreq_kmem_cache_ctor);
+ if (!dev->verbs_txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void verbs_txreq_exit(struct hfi1_ibdev *dev)
+{
+ kmem_cache_destroy(dev->verbs_txreq_cache);
+ dev->verbs_txreq_cache = NULL;
+}
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h
new file mode 100644
index 000000000000..1cf69b2fe4a5
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_VERBS_TXREQ_H
+#define HFI1_VERBS_TXREQ_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "verbs.h"
+#include "sdma_txreq.h"
+#include "iowait.h"
+
+struct verbs_txreq {
+ struct hfi1_pio_header phdr;
+ struct sdma_txreq txreq;
+ struct rvt_qp *qp;
+ struct rvt_swqe *wqe;
+ struct rvt_mregion *mr;
+ struct rvt_sge_state *ss;
+ struct sdma_engine *sde;
+ struct send_context *psc;
+ u16 hdr_dwords;
+};
+
+struct hfi1_ibdev;
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+ struct rvt_qp *qp);
+
+static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
+ struct rvt_qp *qp)
+{
+ struct verbs_txreq *tx;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ /* call slow path to get the lock */
+ tx = __get_txreq(dev, qp);
+ if (IS_ERR(tx))
+ return tx;
+ }
+ tx->qp = qp;
+ tx->mr = NULL;
+ tx->sde = priv->s_sde;
+ tx->psc = priv->s_sendcontext;
+ /* so that we can test if the sdma decriptors are there */
+ tx->txreq.num_desc = 0;
+ return tx;
+}
+
+static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
+{
+ return &tx->txreq;
+}
+
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+{
+ struct sdma_txreq *stx;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ stx = iowait_get_txhead(&priv->s_iowait);
+ if (stx)
+ return container_of(stx, struct verbs_txreq, txreq);
+ return NULL;
+}
+
+void hfi1_put_txreq(struct verbs_txreq *tx);
+int verbs_txreq_init(struct hfi1_ibdev *dev);
+void verbs_txreq_exit(struct hfi1_ibdev *dev);
+
+#endif /* HFI1_VERBS_TXREQ_H */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 02ac3000ee3c..8156e3c9239c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1236,6 +1236,8 @@ enum mlx5_cap_type {
MLX5_CAP_FLOW_TABLE,
MLX5_CAP_ESWITCH_FLOW_TABLE,
MLX5_CAP_ESWITCH,
+ MLX5_CAP_RESERVED,
+ MLX5_CAP_VECTOR_CALC,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1298,6 +1300,10 @@ enum mlx5_cap_type {
#define MLX5_CAP_ODP(mdev, cap)\
MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
+#define MLX5_CAP_VECTOR_CALC(mdev, cap) \
+ MLX5_GET(vector_calc_cap, \
+ mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a954465b2bf..dcd5ac8d3b14 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -613,7 +613,10 @@ struct mlx5_pas {
};
enum port_state_policy {
- MLX5_AAA_000
+ MLX5_POLICY_DOWN = 0,
+ MLX5_POLICY_UP = 1,
+ MLX5_POLICY_FOLLOW = 2,
+ MLX5_POLICY_INVALID = 0xffffffff
};
enum phy_port_state {
@@ -706,8 +709,7 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
int mlx5_cmd_status_to_err_v2(void *ptr);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
- enum mlx5_cap_mode cap_mode);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int out_size);
int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e52730e01ed6..c15b8a864937 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -618,6 +618,33 @@ struct mlx5_ifc_odp_cap_bits {
u8 reserved_at_e0[0x720];
};
+struct mlx5_ifc_calc_op {
+ u8 reserved_at_0[0x10];
+ u8 reserved_at_10[0x9];
+ u8 op_swap_endianness[0x1];
+ u8 op_min[0x1];
+ u8 op_xor[0x1];
+ u8 op_or[0x1];
+ u8 op_and[0x1];
+ u8 op_max[0x1];
+ u8 op_add[0x1];
+};
+
+struct mlx5_ifc_vector_calc_cap_bits {
+ u8 calc_matrix[0x1];
+ u8 reserved_at_1[0x1f];
+ u8 reserved_at_20[0x8];
+ u8 max_vec_count[0x8];
+ u8 reserved_at_30[0xd];
+ u8 max_chunk_size[0x3];
+ struct mlx5_ifc_calc_op calc0;
+ struct mlx5_ifc_calc_op calc1;
+ struct mlx5_ifc_calc_op calc2;
+ struct mlx5_ifc_calc_op calc3;
+
+ u8 reserved_at_e0[0x720];
+};
+
enum {
MLX5_WQ_TYPE_LINKED_LIST = 0x0,
MLX5_WQ_TYPE_CYCLIC = 0x1,
@@ -784,7 +811,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 cd[0x1];
u8 reserved_at_22c[0x1];
u8 apm[0x1];
- u8 reserved_at_22e[0x2];
+ u8 vector_calc[0x1];
+ u8 reserved_at_22f[0x1];
u8 imaicl[0x1];
u8 reserved_at_231[0x4];
u8 qkv[0x1];
@@ -1954,6 +1982,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
+ struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
u8 reserved_at_0[0x8000];
};
@@ -3681,6 +3710,12 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits {
u8 pkey_index[0x10];
};
+enum {
+ MLX5_HCA_VPORT_SEL_PORT_GUID = 1 << 0,
+ MLX5_HCA_VPORT_SEL_NODE_GUID = 1 << 1,
+ MLX5_HCA_VPORT_SEL_STATE_POLICY = 1 << 2,
+};
+
struct mlx5_ifc_query_hca_vport_gid_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index a9f2bcc98cab..bd93e6323603 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -93,6 +93,11 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
- u8 port_num, void *out, size_t out_sz);
+ int vf, u8 port_num, void *out,
+ size_t out_sz);
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ int vf,
+ struct mlx5_hca_vport_context *req);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index be693b34662f..009c85adae4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1176,6 +1176,9 @@ struct net_device_ops {
struct nlattr *port[]);
int (*ndo_get_vf_port)(struct net_device *dev,
int vf, struct sk_buff *skb);
+ int (*ndo_set_vf_guid)(struct net_device *dev,
+ int vf, u64 guid,
+ int guid_type);
int (*ndo_set_vf_rss_query_en)(
struct net_device *dev,
int vf, bool setting);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3a03c1d18afa..fb2cef4e9747 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -56,6 +56,7 @@
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/if_link.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
@@ -97,6 +98,11 @@ enum rdma_node_type {
RDMA_NODE_USNIC_UDP,
};
+enum {
+ /* set the local administered indication */
+ IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2,
+};
+
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
@@ -213,6 +219,7 @@ enum ib_device_cap_flags {
IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
+ IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33),
};
enum ib_signature_prot_cap {
@@ -274,7 +281,7 @@ struct ib_device_attr {
u32 hw_ver;
int max_qp;
int max_qp_wr;
- int device_cap_flags;
+ u64 device_cap_flags;
int max_sge;
int max_sge_rd;
int max_cq;
@@ -490,6 +497,7 @@ union rdma_protocol_stats {
| RDMA_CORE_CAP_OPA_MAD)
struct ib_port_attr {
+ u64 subnet_prefix;
enum ib_port_state state;
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
@@ -509,6 +517,7 @@ struct ib_port_attr {
u8 active_width;
u8 active_speed;
u8 phys_state;
+ bool grh_required;
};
enum ib_device_modify_flags {
@@ -614,6 +623,7 @@ enum {
};
#define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF)
+#define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000)
enum ib_ah_flags {
IB_AH_GRH = 1
@@ -1860,6 +1870,14 @@ struct ib_device {
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
void (*drain_rq)(struct ib_qp *qp);
void (*drain_sq)(struct ib_qp *qp);
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+ int state);
+ int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *ivf);
+ int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_stats *stats);
+ int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+ int type);
struct ib_dma_mapping_ops *dma_ops;
@@ -2303,6 +2321,15 @@ int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid,
struct ib_gid_attr *attr);
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+ int state);
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *info);
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_stats *stats);
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+ int type);
+
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index a0fa975cd1c1..2b95c2c336eb 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -97,7 +97,7 @@
#define OPA_LINKDOWN_REASON_WIDTH_POLICY 41
/* 42-48 reserved */
#define OPA_LINKDOWN_REASON_DISCONNECTED 49
-#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50
+#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50
#define OPA_LINKDOWN_REASON_NOT_INSTALLED 51
#define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52
/* 53 reserved */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
new file mode 100644
index 000000000000..a8696551abb1
--- /dev/null
+++ b/include/rdma/rdma_vt.h
@@ -0,0 +1,481 @@
+#ifndef DEF_RDMA_VT_H
+#define DEF_RDMA_VT_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Structure that low level drivers will populate in order to register with the
+ * rdmavt layer.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_mr.h>
+#include <rdma/rdmavt_qp.h>
+
+#define RVT_MAX_PKEY_VALUES 16
+
+struct rvt_ibport {
+ struct rvt_qp __rcu *qp[2];
+ struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
+ struct rb_root mcast_tree;
+ spinlock_t lock; /* protect changes in this struct */
+
+ /* non-zero when timer is set */
+ unsigned long mkey_lease_timeout;
+ unsigned long trap_timeout;
+ __be64 gid_prefix; /* in network order */
+ __be64 mkey;
+ u64 tid;
+ u32 port_cap_flags;
+ u32 pma_sample_start;
+ u32 pma_sample_interval;
+ __be16 pma_counter_select[5];
+ u16 pma_tag;
+ u16 mkey_lease_period;
+ u16 sm_lid;
+ u8 sm_sl;
+ u8 mkeyprot;
+ u8 subnet_timeout;
+ u8 vl_high_limit;
+
+ /*
+ * Driver is expected to keep these up to date. These
+ * counters are informational only and not required to be
+ * completely accurate.
+ */
+ u64 n_rc_resends;
+ u64 n_seq_naks;
+ u64 n_rdma_seq;
+ u64 n_rnr_naks;
+ u64 n_other_naks;
+ u64 n_loop_pkts;
+ u64 n_pkt_drops;
+ u64 n_vl15_dropped;
+ u64 n_rc_timeouts;
+ u64 n_dmawait;
+ u64 n_unaligned;
+ u64 n_rc_dupreq;
+ u64 n_rc_seqnak;
+ u16 pkey_violations;
+ u16 qkey_violations;
+ u16 mkey_violations;
+
+ /* Hot-path per CPU counters to avoid cacheline trading to update */
+ u64 z_rc_acks;
+ u64 z_rc_qacks;
+ u64 z_rc_delayed_comp;
+ u64 __percpu *rc_acks;
+ u64 __percpu *rc_qacks;
+ u64 __percpu *rc_delayed_comp;
+
+ void *priv; /* driver private data */
+
+ /*
+ * The pkey table is allocated and maintained by the driver. Drivers
+ * need to have access to this before registering with rdmav. However
+ * rdmavt will need access to it so drivers need to proviee this during
+ * the attach port API call.
+ */
+ u16 *pkey_table;
+
+ struct rvt_ah *sm_ah;
+};
+
+#define RVT_CQN_MAX 16 /* maximum length of cq name */
+
+/*
+ * Things that are driver specific, module parameters in hfi1 and qib
+ */
+struct rvt_driver_params {
+ struct ib_device_attr props;
+
+ /*
+ * Anything driver specific that is not covered by props
+ * For instance special module parameters. Goes here.
+ */
+ unsigned int lkey_table_size;
+ unsigned int qp_table_size;
+ int qpn_start;
+ int qpn_inc;
+ int qpn_res_start;
+ int qpn_res_end;
+ int nports;
+ int npkeys;
+ u8 qos_shift;
+ char cq_name[RVT_CQN_MAX];
+ int node;
+ int max_rdma_atomic;
+ int psn_mask;
+ int psn_shift;
+ int psn_modify_mask;
+ u32 core_cap_flags;
+ u32 max_mad_size;
+};
+
+/* Protection domain */
+struct rvt_pd {
+ struct ib_pd ibpd;
+ int user; /* non-zero if created from user space */
+};
+
+/* Address handle */
+struct rvt_ah {
+ struct ib_ah ibah;
+ struct ib_ah_attr attr;
+ atomic_t refcount;
+ u8 vl;
+ u8 log_pmtu;
+};
+
+struct rvt_dev_info;
+struct rvt_swqe;
+struct rvt_driver_provided {
+ /*
+ * Which functions are required depends on which verbs rdmavt is
+ * providing and which verbs the driver is overriding. See
+ * check_support() for details.
+ */
+
+ /* Passed to ib core registration. Callback to create syfs files */
+ int (*port_callback)(struct ib_device *, u8, struct kobject *);
+
+ /*
+ * Returns a string to represent the device for which is being
+ * registered. This is primarily used for error and debug messages on
+ * the console.
+ */
+ const char * (*get_card_name)(struct rvt_dev_info *rdi);
+
+ /*
+ * Returns a pointer to the undelying hardware's PCI device. This is
+ * used to display information as to what hardware is being referenced
+ * in an output message
+ */
+ struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
+
+ /*
+ * Allocate a private queue pair data structure for driver specific
+ * information which is opaque to rdmavt.
+ */
+ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ gfp_t gfp);
+
+ /*
+ * Free the driver's private qp structure.
+ */
+ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+
+ /*
+ * Inform the driver the particular qp in quesiton has been reset so
+ * that it can clean up anything it needs to.
+ */
+ void (*notify_qp_reset)(struct rvt_qp *qp);
+
+ /*
+ * Give the driver a notice that there is send work to do. It is up to
+ * the driver to generally push the packets out, this just queues the
+ * work with the driver. There are two variants here. The no_lock
+ * version requires the s_lock not to be held. The other assumes the
+ * s_lock is held.
+ */
+ void (*schedule_send)(struct rvt_qp *qp);
+ void (*schedule_send_no_lock)(struct rvt_qp *qp);
+
+ /*
+ * Sometimes rdmavt needs to kick the driver's send progress. That is
+ * done by this call back.
+ */
+ void (*do_send)(struct rvt_qp *qp);
+
+ /*
+ * Get a path mtu from the driver based on qp attributes.
+ */
+ int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_attr *attr);
+
+ /*
+ * Notify driver that it needs to flush any outstanding IO requests that
+ * are waiting on a qp.
+ */
+ void (*flush_qp_waiters)(struct rvt_qp *qp);
+
+ /*
+ * Notify driver to stop its queue of sending packets. Nothing else
+ * should be posted to the queue pair after this has been called.
+ */
+ void (*stop_send_queue)(struct rvt_qp *qp);
+
+ /*
+ * Have the drivr drain any in progress operations
+ */
+ void (*quiesce_qp)(struct rvt_qp *qp);
+
+ /*
+ * Inform the driver a qp has went to error state.
+ */
+ void (*notify_error_qp)(struct rvt_qp *qp);
+
+ /*
+ * Get an MTU for a qp.
+ */
+ u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ u32 pmtu);
+ /*
+ * Convert an mtu to a path mtu
+ */
+ int (*mtu_to_path_mtu)(u32 mtu);
+
+ /*
+ * Get the guid of a port in big endian byte order
+ */
+ int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+ int guid_index, __be64 *guid);
+
+ /*
+ * Query driver for the state of the port.
+ */
+ int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num,
+ struct ib_port_attr *props);
+
+ /*
+ * Tell driver to shutdown a port
+ */
+ int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num);
+
+ /* Tell driver to send a trap for changed port capabilities */
+ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num);
+
+ /*
+ * The following functions can be safely ignored completely. Any use of
+ * these is checked for NULL before blindly calling. Rdmavt should also
+ * be functional if drivers omit these.
+ */
+
+ /* Called to inform the driver that all qps should now be freed. */
+ unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
+
+ /* Driver specific AH validation */
+ int (*check_ah)(struct ib_device *, struct ib_ah_attr *);
+
+ /* Inform the driver a new AH has been created */
+ void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *,
+ struct rvt_ah *);
+
+ /* Let the driver pick the next queue pair number*/
+ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+ enum ib_qp_type type, u8 port_num, gfp_t gfp);
+
+ /* Determine if its safe or allowed to modify the qp */
+ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+ /* Driver specific QP modification/notification-of */
+ void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+ /* Driver specific work request checking */
+ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+ /* Notify driver a mad agent has been created */
+ void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
+
+ /* Notify driver a mad agent has been removed */
+ void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
+
+};
+
+struct rvt_dev_info {
+ struct ib_device ibdev; /* Keep this first. Nothing above here */
+
+ /*
+ * Prior to calling for registration the driver will be responsible for
+ * allocating space for this structure.
+ *
+ * The driver will also be responsible for filling in certain members of
+ * dparms.props. The driver needs to fill in dparms exactly as it would
+ * want values reported to a ULP. This will be returned to the caller
+ * in rdmavt's device. The driver should also therefore refrain from
+ * modifying this directly after registration with rdmavt.
+ */
+
+ /* Driver specific properties */
+ struct rvt_driver_params dparms;
+
+ struct rvt_mregion __rcu *dma_mr;
+ struct rvt_lkey_table lkey_table;
+
+ /* Driver specific helper functions */
+ struct rvt_driver_provided driver_f;
+
+ /* Internal use */
+ int n_pds_allocated;
+ spinlock_t n_pds_lock; /* Protect pd allocated count */
+
+ int n_ahs_allocated;
+ spinlock_t n_ahs_lock; /* Protect ah allocated count */
+
+ u32 n_srqs_allocated;
+ spinlock_t n_srqs_lock; /* Protect srqs allocated count */
+
+ int flags;
+ struct rvt_ibport **ports;
+
+ /* QP */
+ struct rvt_qp_ibdev *qp_dev;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ u32 n_rc_qps; /* number of RC QPs allocated for device */
+ u32 busy_jiffies; /* timeout scaling based on RC QP count */
+ spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */
+
+ /* memory maps */
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock; /* protect mmap_offset */
+ u32 mmap_offset;
+ spinlock_t pending_lock; /* protect pending mmap list */
+
+ /* CQ */
+ struct kthread_worker *worker; /* per device cq worker */
+ u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock; /* protect count of in use cqs */
+
+ /* Multicast */
+ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+
+};
+
+static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct rvt_pd, ibpd);
+}
+
+static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct rvt_ah, ibah);
+}
+
+static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct rvt_dev_info, ibdev);
+}
+
+static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct rvt_srq, ibsrq);
+}
+
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
+static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
+{
+ /*
+ * All ports have same number of pkeys.
+ */
+ return rdi->dparms.npkeys;
+}
+
+/*
+ * Return the indexed PKEY from the port PKEY table.
+ */
+static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
+ int port_index,
+ unsigned index)
+{
+ if (index >= rvt_get_npkeys(rdi))
+ return 0;
+ else
+ return rdi->ports[port_index]->pkey_table[index];
+}
+
+/**
+ * rvt_lookup_qpn - return the QP with the given QPN
+ * @ibp: the ibport
+ * @qpn: the QP number to look up
+ *
+ * The caller must hold the rcu_read_lock(), and keep the lock until
+ * the returned qp is no longer in use.
+ */
+/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */
+static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
+ struct rvt_ibport *rvp,
+ u32 qpn) __must_hold(RCU)
+{
+ struct rvt_qp *qp = NULL;
+
+ if (unlikely(qpn <= 1)) {
+ qp = rcu_dereference(rvp->qp[qpn]);
+ } else {
+ u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
+
+ for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next))
+ if (qp->ibqp.qp_num == qpn)
+ break;
+ }
+ return qp;
+}
+
+struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
+int rvt_register_device(struct rvt_dev_info *rvd);
+void rvt_unregister_device(struct rvt_dev_info *rvd);
+int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
+ int port_index, u16 *pkey_table);
+int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
+ u32 len, u64 vaddr, u32 rkey, int acc);
+int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+ struct rvt_sge *isge, struct ib_sge *sge, int acc);
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid);
+
+#endif /* DEF_RDMA_VT_H */
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
new file mode 100644
index 000000000000..51fd00b243d0
--- /dev/null
+++ b/include/rdma/rdmavt_cq.h
@@ -0,0 +1,99 @@
+#ifndef DEF_RDMAVT_INCCQ_H
+#define DEF_RDMAVT_INCCQ_H
+
+/*
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <rdma/ib_user_verbs.h>
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1)
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct rvt_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ union {
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
+ struct ib_wc kqueue[0];
+ };
+};
+
+/*
+ * The completion queue structure.
+ */
+struct rvt_cq {
+ struct ib_cq ibcq;
+ struct kthread_work comptask;
+ spinlock_t lock; /* protect changes in this struct */
+ u8 notify;
+ u8 triggered;
+ struct rvt_dev_info *rdi;
+ struct rvt_cq_wc *queue;
+ struct rvt_mmap_info *ip;
+};
+
+static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct rvt_cq, ibcq);
+}
+
+void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+
+#endif /* DEF_RDMAVT_INCCQH */
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
new file mode 100644
index 000000000000..5edffdca8c53
--- /dev/null
+++ b/include/rdma/rdmavt_mr.h
@@ -0,0 +1,139 @@
+#ifndef DEF_RDMAVT_INCMR_H
+#define DEF_RDMAVT_INCMR_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once
+ * drivers no longer need access to the MR directly.
+ */
+
+/*
+ * A segment is a linear region of low physical memory.
+ * Used by the verbs layer.
+ */
+struct rvt_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of rvt_segs that fit in a page. */
+#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg))
+
+struct rvt_segarray {
+ struct rvt_seg segs[RVT_SEGSZ];
+};
+
+struct rvt_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of rvt_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ u8 page_shift; /* 0 - non unform/non powerof2 sizes */
+ u8 lkey_published; /* in global table */
+ struct completion comp; /* complete when refcount goes to zero */
+ atomic_t refcount;
+ struct rvt_segarray *map[0]; /* the segments */
+};
+
+#define RVT_MAX_LKEY_TABLE_BITS 23
+
+struct rvt_lkey_table {
+ spinlock_t lock; /* protect changes in this struct */
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
+ u32 max; /* size of the table */
+ struct rvt_mregion __rcu **table;
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct rvt_sge {
+ struct rvt_mregion *mr;
+ void *vaddr; /* kernel virtual address of segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+struct rvt_sge_state {
+ struct rvt_sge *sg_list; /* next SGE to be used if any */
+ struct rvt_sge sge; /* progress state for the current SGE */
+ u32 total_len;
+ u8 num_sge;
+};
+
+static inline void rvt_put_mr(struct rvt_mregion *mr)
+{
+ if (unlikely(atomic_dec_and_test(&mr->refcount)))
+ complete(&mr->comp);
+}
+
+static inline void rvt_get_mr(struct rvt_mregion *mr)
+{
+ atomic_inc(&mr->refcount);
+}
+
+static inline void rvt_put_ss(struct rvt_sge_state *ss)
+{
+ while (ss->num_sge) {
+ rvt_put_mr(ss->sge.mr);
+ if (--ss->num_sge)
+ ss->sge = *ss->sg_list++;
+ }
+}
+
+#endif /* DEF_RDMAVT_INCMRH */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
new file mode 100644
index 000000000000..497e59065c2c
--- /dev/null
+++ b/include/rdma/rdmavt_qp.h
@@ -0,0 +1,446 @@
+#ifndef DEF_RDMAVT_INCQP_H
+#define DEF_RDMAVT_INCQP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_verbs.h>
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define RVT_R_WRID_VALID 0
+#define RVT_R_REWIND_SGE 1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define RVT_R_REUSE_SGE 0x01
+#define RVT_R_RDMAR_SEQ 0x02
+#define RVT_R_RSP_NAK 0x04
+#define RVT_R_RSP_SEND 0x08
+#define RVT_R_COMM_EST 0x10
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * RVT_S_BUSY - send tasklet is processing the QP
+ * RVT_S_TIMER - the RC retry timer is active
+ * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ * before processing the next SWQE
+ * RVT_S_WAIT_RNR - waiting for RNR timeout
+ * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA
+ * RVT_S_WAIT_PIO - waiting for a send buffer to be available
+ * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets
+ * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available
+ * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * RVT_S_WAIT_KMEM - waiting for kernel memory to be available
+ * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ * RVT_S_ECN - a BECN was queued to the send engine
+ */
+#define RVT_S_SIGNAL_REQ_WR 0x0001
+#define RVT_S_BUSY 0x0002
+#define RVT_S_TIMER 0x0004
+#define RVT_S_RESP_PENDING 0x0008
+#define RVT_S_ACK_PENDING 0x0010
+#define RVT_S_WAIT_FENCE 0x0020
+#define RVT_S_WAIT_RDMAR 0x0040
+#define RVT_S_WAIT_RNR 0x0080
+#define RVT_S_WAIT_SSN_CREDIT 0x0100
+#define RVT_S_WAIT_DMA 0x0200
+#define RVT_S_WAIT_PIO 0x0400
+#define RVT_S_WAIT_PIO_DRAIN 0x0800
+#define RVT_S_WAIT_TX 0x1000
+#define RVT_S_WAIT_DMA_DESC 0x2000
+#define RVT_S_WAIT_KMEM 0x4000
+#define RVT_S_WAIT_PSN 0x8000
+#define RVT_S_WAIT_ACK 0x10000
+#define RVT_S_SEND_ONE 0x20000
+#define RVT_S_UNLIMITED_CREDIT 0x40000
+#define RVT_S_AHG_VALID 0x80000
+#define RVT_S_AHG_CLEAR 0x100000
+#define RVT_S_ECN 0x200000
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
+ RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \
+ RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \
+ RVT_S_WAIT_PSN | RVT_S_WAIT_ACK)
+
+#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+
+/* Number of bits to pay attention to in the opcode for checking qp type */
+#define RVT_OPCODE_QP_MASK 0xE0
+
+/* Flags for checking QP state (see ib_rvt_state_ops[]) */
+#define RVT_POST_SEND_OK 0x01
+#define RVT_POST_RECV_OK 0x02
+#define RVT_PROCESS_RECV_OK 0x04
+#define RVT_PROCESS_SEND_OK 0x08
+#define RVT_PROCESS_NEXT_SEND_OK 0x10
+#define RVT_FLUSH_SEND 0x20
+#define RVT_FLUSH_RECV 0x40
+#define RVT_PROCESS_OR_FLUSH_SEND \
+ (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND)
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct rvt_swqe {
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_ud_wr ud_wr;
+ struct ib_reg_wr reg_wr;
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ };
+ u32 psn; /* first packet sequence number */
+ u32 lpsn; /* last packet sequence number */
+ u32 ssn; /* send sequence number */
+ u32 length; /* total length of data in sg_list */
+ struct rvt_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct rvt_rwqe {
+ u64 wr_id;
+ u8 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct rvt_rwq {
+ u32 head; /* new work requests posted to the head */
+ u32 tail; /* receives pull requests from here. */
+ struct rvt_rwqe wq[0];
+};
+
+struct rvt_rq {
+ struct rvt_rwq *wq;
+ u32 size; /* size of RWQE array */
+ u8 max_sge;
+ /* protect changes in this struct */
+ spinlock_t lock ____cacheline_aligned_in_smp;
+};
+
+/*
+ * This structure is used by rvt_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct rvt_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ unsigned size;
+};
+
+#define RVT_MAX_RDMA_ATOMIC 16
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct rvt_ack_entry {
+ u8 opcode;
+ u8 sent;
+ u32 psn;
+ u32 lpsn;
+ union {
+ struct rvt_sge rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+#define RC_QP_SCALING_INTERVAL 5
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct rvt_qp {
+ struct ib_qp ibqp;
+ void *priv; /* Driver private data */
+ /* read mostly fields above and below */
+ struct ib_ah_attr remote_ah_attr;
+ struct ib_ah_attr alt_ah_attr;
+ struct rvt_qp __rcu *next; /* link list for QPN hash table */
+ struct rvt_swqe *s_wq; /* send work queue */
+ struct rvt_mmap_info *ip;
+
+ unsigned long timeout_jiffies; /* computed from timeout */
+
+ enum ib_mtu path_mtu;
+ int srate_mbps; /* s_srate (below) converted to Mbit/s */
+ pid_t pid; /* pid for user mode QPs */
+ u32 remote_qpn;
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_ahgpsn; /* set to the psn in the copy of the header */
+
+ u16 pmtu; /* decoded from path_mtu */
+ u8 log_pmtu; /* shift for pmtu */
+ u8 state; /* QP state */
+ u8 allowed_ops; /* high order bits of allowed opcodes */
+ u8 qp_access_flags;
+ u8 alt_timeout; /* Alternate path timeout for this QP */
+ u8 timeout; /* Timeout for this QP */
+ u8 s_srate;
+ u8 s_mig_state;
+ u8 port_num;
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_draining;
+
+ /* start of read/write fields */
+ atomic_t refcount ____cacheline_aligned_in_smp;
+ wait_queue_head_t wait;
+
+ struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1]
+ ____cacheline_aligned_in_smp;
+ struct rvt_sge_state s_rdma_read_sge;
+
+ spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
+ u32 r_psn; /* expected rcv packet sequence number */
+ unsigned long r_aflags;
+ u64 r_wr_id; /* ID for current receive WQE */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_msn; /* message sequence number */
+
+ u8 r_state; /* opcode of last packet received */
+ u8 r_flags;
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+
+ struct list_head rspwait; /* link for waiting to respond */
+
+ struct rvt_sge_state r_sge; /* current receive data */
+ struct rvt_rq r_rq; /* receive work queue */
+
+ /* post send line */
+ spinlock_t s_hlock ____cacheline_aligned_in_smp;
+ u32 s_head; /* new entries added here */
+ u32 s_next_psn; /* PSN for next request */
+ u32 s_avail; /* number of entries avail */
+ u32 s_ssn; /* SSN of tail entry */
+
+ spinlock_t s_lock ____cacheline_aligned_in_smp;
+ u32 s_flags;
+ struct rvt_sge_state *s_cur_sge;
+ struct rvt_swqe *s_wqe;
+ struct rvt_sge_state s_sge; /* current send request data */
+ struct rvt_mregion *s_rdma_mr;
+ u32 s_cur_size; /* size of send packet in bytes */
+ u32 s_len; /* total length of s_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
+ u32 s_last_psn; /* last response PSN processed */
+ u32 s_sending_psn; /* lowest PSN that is being sent */
+ u32 s_sending_hpsn; /* highest PSN that is being sent */
+ u32 s_psn; /* current packet sequence number */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_acked; /* last un-ACK'ed entry */
+ u32 s_last; /* last completed entry */
+ u32 s_lsn; /* limit sequence number (credit) */
+ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
+ u16 s_rdma_ack_cnt;
+ s8 s_ahgidx;
+ u8 s_state; /* opcode of last packet sent */
+ u8 s_ack_state; /* opcode of packet to ACK */
+ u8 s_nak_state; /* non-zero if NAK is pending */
+ u8 r_nak_state; /* non-zero if NAK is pending */
+ u8 s_retry; /* requester retry counter */
+ u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+
+ struct rvt_sge_state s_ack_rdma_sge;
+ struct timer_list s_timer;
+
+ /*
+ * This sge list MUST be last. Do not add anything below here.
+ */
+ struct rvt_sge r_sg_list[0] /* verified SGEs */
+ ____cacheline_aligned_in_smp;
+};
+
+struct rvt_srq {
+ struct ib_srq ibsrq;
+ struct rvt_rq rq;
+ struct rvt_mmap_info *ip;
+ /* send signal when number of RWQEs < limit */
+ u32 limit;
+};
+
+#define RVT_QPN_MAX BIT(24)
+#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
+#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1)
+#define RVT_QPN_MASK 0xFFFFFF
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct rvt_qpn_map {
+ void *page;
+};
+
+struct rvt_qpn_table {
+ spinlock_t lock; /* protect changes to the qp table */
+ unsigned flags; /* flags for QP0/1 allocated for each port */
+ u32 last; /* last QP number allocated */
+ u32 nmaps; /* size of the map table */
+ u16 limit;
+ u8 incr;
+ /* bit map of free QP numbers other than 0/1 */
+ struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES];
+};
+
+struct rvt_qp_ibdev {
+ u32 qp_table_size;
+ u32 qp_table_bits;
+ struct rvt_qp __rcu **qp_table;
+ spinlock_t qpt_lock; /* qptable lock */
+ struct rvt_qpn_table qpn_table;
+};
+
+/*
+ * There is one struct rvt_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct rvt_mcast_qp.
+ */
+struct rvt_mcast_qp {
+ struct list_head list;
+ struct rvt_qp *qp;
+};
+
+struct rvt_mcast {
+ struct rb_node rb_node;
+ union ib_gid mgid;
+ struct list_head qp_list;
+ wait_queue_head_t wait;
+ atomic_t refcount;
+ int n_attached;
+};
+
+/*
+ * Since struct rvt_swqe is not a fixed size, we can't simply index into
+ * struct rvt_qp.s_wq. This function does the array index computation.
+ */
+static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp,
+ unsigned n)
+{
+ return (struct rvt_swqe *)((char *)qp->s_wq +
+ (sizeof(struct rvt_swqe) +
+ qp->s_max_sge *
+ sizeof(struct rvt_sge)) * n);
+}
+
+/*
+ * Since struct rvt_rwqe is not a fixed size, we can't simply index into
+ * struct rvt_rwq.wq. This function does the array index computation.
+ */
+static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
+{
+ return (struct rvt_rwqe *)
+ ((char *)rq->wq->wq +
+ (sizeof(struct rvt_rwqe) +
+ rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+extern const int ib_rvt_state_ops[];
+
+struct rvt_dev_info;
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
+
+#endif /* DEF_RDMAVT_INCQP_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8e3f88fa5b59..a62a0129d614 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -599,6 +599,8 @@ enum {
*/
IFLA_VF_STATS, /* network device statistics */
IFLA_VF_TRUST, /* Trust VF */
+ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */
+ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */
__IFLA_VF_MAX,
};
@@ -631,6 +633,11 @@ struct ifla_vf_spoofchk {
__u32 setting;
};
+struct ifla_vf_guid {
+ __u32 vf;
+ __u64 guid;
+};
+
enum {
IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
IFLA_VF_LINK_STATE_ENABLE, /* link always up */
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index 288694e422fb..a533cecab14f 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -66,7 +66,7 @@
* The major version changes when data structures change in an incompatible
* way. The driver must be the same for initialization to succeed.
*/
-#define HFI1_USER_SWMAJOR 4
+#define HFI1_USER_SWMAJOR 5
/*
* Minor version differences are always compatible
@@ -93,7 +93,7 @@
#define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/
#define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */
#define HFI1_CAP_NODROP_EGR_FULL (1UL << 9) /* Don't drop on EGR buffs full */
-#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Enable Expected TID caching */
+#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Disable Expected TID caching */
#define HFI1_CAP_PRINT_UNIMPL (1UL << 11) /* Show for unimplemented feats */
#define HFI1_CAP_ALLOW_PERM_JKEY (1UL << 12) /* Allow use of permissive JKEY */
#define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */
@@ -134,6 +134,7 @@
#define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */
#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */
#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */
+#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */
/* separate EPROM commands from normal PSM commands */
#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */
#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */
@@ -147,13 +148,15 @@
#define _HFI1_EVENT_LID_CHANGE_BIT 2
#define _HFI1_EVENT_LMC_CHANGE_BIT 3
#define _HFI1_EVENT_SL2VL_CHANGE_BIT 4
-#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_SL2VL_CHANGE_BIT
+#define _HFI1_EVENT_TID_MMU_NOTIFY_BIT 5
+#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_TID_MMU_NOTIFY_BIT
#define HFI1_EVENT_FROZEN (1UL << _HFI1_EVENT_FROZEN_BIT)
#define HFI1_EVENT_LINKDOWN (1UL << _HFI1_EVENT_LINKDOWN_BIT)
#define HFI1_EVENT_LID_CHANGE (1UL << _HFI1_EVENT_LID_CHANGE_BIT)
#define HFI1_EVENT_LMC_CHANGE (1UL << _HFI1_EVENT_LMC_CHANGE_BIT)
#define HFI1_EVENT_SL2VL_CHANGE (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT)
+#define HFI1_EVENT_TID_MMU_NOTIFY (1UL << _HFI1_EVENT_TID_MMU_NOTIFY_BIT)
/*
* These are the status bits readable (in ASCII form, 64bit value)
@@ -238,11 +241,6 @@ struct hfi1_tid_info {
__u32 tidcnt;
/* length of transfer buffer programmed by this request */
__u32 length;
- /*
- * pointer to bitmap of TIDs used for this call;
- * checked for being large enough at open
- */
- __u64 tidmap;
};
struct hfi1_cmd {
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f7d7b6fec935..6e373d151cad 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -8,6 +8,7 @@ enum {
RDMA_NL_IWCM,
RDMA_NL_RSVD,
RDMA_NL_LS, /* RDMA Local Services */
+ RDMA_NL_I40IW,
RDMA_NL_NUM_CLIENTS
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d2d9e5ebf58e..167883e09317 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1389,6 +1389,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
[IFLA_VF_STATS] = { .type = NLA_NESTED },
[IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
+ [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) },
+ [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1593,6 +1595,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return 0;
}
+static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
+ int guid_type)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
+}
+
+static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
+{
+ if (dev->type != ARPHRD_INFINIBAND)
+ return -EOPNOTSUPP;
+
+ return handle_infiniband_guid(dev, ivt, guid_type);
+}
+
static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -1695,6 +1713,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
+ if (tb[IFLA_VF_IB_NODE_GUID]) {
+ struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
+
+ if (!ops->ndo_set_vf_guid)
+ return -EOPNOTSUPP;
+
+ return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
+ }
+
+ if (tb[IFLA_VF_IB_PORT_GUID]) {
+ struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
+
+ if (!ops->ndo_set_vf_guid)
+ return -EOPNOTSUPP;
+
+ return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
+ }
+
return err;
}